You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by ba...@apache.org on 2020/09/26 12:38:49 UTC

[systemds] 01/02: [SYSTEMDS-2675+2676] Python Strings and printing

This is an automated email from the ASF dual-hosted git repository.

baunsgaard pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git

commit 1e7b6d416c5553ae09fe99d7472443dd02c79fad
Author: baunsgaard <ba...@tugraz.at>
AuthorDate: Fri Sep 25 19:04:21 2020 +0200

    [SYSTEMDS-2675+2676] Python Strings and printing
    
    Add strings to python interface to enable calling SystemDS without
    enforcing an transfer and parsing into python.
---
 .../python/systemds/context/systemds_context.py    |  5 +-
 .../python/systemds/operator/operation_node.py     |  9 +++-
 src/main/python/systemds/script_building/dag.py    |  1 +
 .../save_log_reg_mnist_sysds.py}                   | 54 +++++++---------------
 .../tests/matrix/{test_write.py => test_print.py}  | 19 +++-----
 src/main/python/tests/matrix/test_write.py         |  7 +++
 6 files changed, 43 insertions(+), 52 deletions(-)

diff --git a/src/main/python/systemds/context/systemds_context.py b/src/main/python/systemds/context/systemds_context.py
index 017d56c..d80fdfa 100644
--- a/src/main/python/systemds/context/systemds_context.py
+++ b/src/main/python/systemds/context/systemds_context.py
@@ -37,7 +37,7 @@ from py4j.protocol import Py4JNetworkError
 from systemds.utils.consts import VALID_INPUT_TYPES
 from systemds.utils.helpers import get_module_dir
 from systemds.operator import OperationNode
-
+from systemds.script_building import OutputType
 
 class SystemDSContext(object):
     """A context with a connection to a java instance with which SystemDS operations are executed. 
@@ -276,3 +276,6 @@ class SystemDSContext(object):
 
     def read(self, path: os.PathLike, **kwargs: Dict[str, VALID_INPUT_TYPES]):
         return OperationNode(self, 'read', [f'"{path}"'], named_input_nodes=kwargs)
+
+    def scalar(self, v: Dict[str, VALID_INPUT_TYPES]):
+        return OperationNode(self, v, output_type=OutputType.SCALAR)
\ No newline at end of file
diff --git a/src/main/python/systemds/operator/operation_node.py b/src/main/python/systemds/operator/operation_node.py
index aba4c63..ebe5804 100644
--- a/src/main/python/systemds/operator/operation_node.py
+++ b/src/main/python/systemds/operator/operation_node.py
@@ -161,6 +161,8 @@ class OperationNode(DAGNode):
             return f'{output}={self.operation}({inputs_comma_sep});'
         elif self.output_type == OutputType.NONE:
             return f'{self.operation}({inputs_comma_sep});'
+        elif self.output_type == OutputType.SCALAR:
+            return f'{var_name}={self.operation};'
         else:
             return f'{var_name}={self.operation}({inputs_comma_sep});'
 
@@ -339,12 +341,17 @@ class OperationNode(DAGNode):
         return OperationNode(self.sds_context, 'moment', unnamed_inputs, output_type=OutputType.DOUBLE)
 
     def write(self, destination: str, format:str = "binary", **kwargs: Dict[str, VALID_INPUT_TYPES]) -> 'OperationNode':
-        
         unnamed_inputs = [self, f'"{destination}"']
         named_parameters = {"format":f'"{format}"'}
         named_parameters.update(kwargs)
         return OperationNode(self.sds_context, 'write', unnamed_inputs, named_parameters, output_type= OutputType.NONE)
 
+    def to_string(self, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> 'OperationNode':
+        return OperationNode(self.sds_context, 'toString', [self], kwargs, output_type= OutputType.DOUBLE)
+
+    def print(self, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> 'OperationNode':
+        return OperationNode(self.sds_context, 'print', [self], kwargs, output_type= OutputType.NONE)
+
     def rev(self) -> 'OperationNode':
         """ Reverses the rows in a matrix
 
diff --git a/src/main/python/systemds/script_building/dag.py b/src/main/python/systemds/script_building/dag.py
index 69988f4..9e027e6 100644
--- a/src/main/python/systemds/script_building/dag.py
+++ b/src/main/python/systemds/script_building/dag.py
@@ -33,6 +33,7 @@ if TYPE_CHECKING:
 class OutputType(Enum):
     MATRIX = auto()
     DOUBLE = auto()
+    SCALAR = auto()
     LIST = auto()
     NONE = auto()
 
diff --git a/src/main/python/tests/matrix/test_write.py b/src/main/python/tests/manual_tests/save_log_reg_mnist_sysds.py
similarity index 55%
copy from src/main/python/tests/matrix/test_write.py
copy to src/main/python/tests/manual_tests/save_log_reg_mnist_sysds.py
index 14b0b8f..cbe22a0 100644
--- a/src/main/python/tests/matrix/test_write.py
+++ b/src/main/python/tests/manual_tests/save_log_reg_mnist_sysds.py
@@ -19,43 +19,21 @@
 #
 # -------------------------------------------------------------
 
-import math
-import os
-import random
-import shutil
-import sys
-import unittest
-
-import numpy as np
-import scipy.stats as st
 from systemds.context import SystemDSContext
 from systemds.matrix import Matrix
-
-
-class TestWrite(unittest.TestCase):
-
-    sds: SystemDSContext = None
-    temp_dir: str = "tests/matrix/temp_write/"
-
-    @classmethod
-    def setUpClass(cls):
-        cls.sds = SystemDSContext()
-
-    @classmethod
-    def tearDownClass(cls):
-        cls.sds.close()
-
-    def tearDown(self):
-        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
-    def test_write_01(self):
-        original = np.ones([10, 10])
-        X = Matrix(self.sds, original)
-        X.write(self.temp_dir + "01").compute()
-        NX = self.sds.read(self.temp_dir + "01")
-        res = NX.compute()
-        self.assertTrue(np.allclose(original, res))
-
-
-if __name__ == "__main__":
-    unittest.main(exit=False)
+from systemds.examples.tutorials.mnist import DataManager
+
+d = DataManager()
+
+base_path = "systemds/examples/tutorials/mnist/"
+with SystemDSContext() as sds:
+    # Train Data
+    X = Matrix(sds, d.get_train_data().reshape((60000, 28*28)))
+    X.write(base_path + "train_data").compute()
+    Y = Matrix(sds, d.get_train_labels()) + 1.0
+    Y.write(base_path + "train_labels").compute()
+    Xt = Matrix(sds, d.get_test_data().reshape((10000, 28*28)))
+    Xt.write(base_path + "test_data").compute()
+    Yt = Matrix(sds, d.get_test_labels()) + 1.0
+    Yt.write(base_path + "test_labels").compute()
+    
diff --git a/src/main/python/tests/matrix/test_write.py b/src/main/python/tests/matrix/test_print.py
similarity index 75%
copy from src/main/python/tests/matrix/test_write.py
copy to src/main/python/tests/matrix/test_print.py
index 14b0b8f..ee928df 100644
--- a/src/main/python/tests/matrix/test_write.py
+++ b/src/main/python/tests/matrix/test_print.py
@@ -32,10 +32,9 @@ from systemds.context import SystemDSContext
 from systemds.matrix import Matrix
 
 
-class TestWrite(unittest.TestCase):
+class TestPrint(unittest.TestCase):
 
     sds: SystemDSContext = None
-    temp_dir: str = "tests/matrix/temp_write/"
 
     @classmethod
     def setUpClass(cls):
@@ -45,17 +44,13 @@ class TestWrite(unittest.TestCase):
     def tearDownClass(cls):
         cls.sds.close()
 
-    def tearDown(self):
-        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
-    def test_write_01(self):
-        original = np.ones([10, 10])
-        X = Matrix(self.sds, original)
-        X.write(self.temp_dir + "01").compute()
-        NX = self.sds.read(self.temp_dir + "01")
-        res = NX.compute()
-        self.assertTrue(np.allclose(original, res))
+    def test_print_01(self):
+        Matrix(self.sds, np.array([1])).to_string().print().compute()
+        self.assertEquals('1.000',self.sds.get_stdout()[0])
 
+    def test_print_02(self):
+        self.sds.scalar(1).print().compute()
+        self.assertEquals('1', self.sds.get_stdout()[0])
 
 if __name__ == "__main__":
     unittest.main(exit=False)
diff --git a/src/main/python/tests/matrix/test_write.py b/src/main/python/tests/matrix/test_write.py
index 14b0b8f..a8f0d52 100644
--- a/src/main/python/tests/matrix/test_write.py
+++ b/src/main/python/tests/matrix/test_write.py
@@ -56,6 +56,13 @@ class TestWrite(unittest.TestCase):
         res = NX.compute()
         self.assertTrue(np.allclose(original, res))
 
+    def test_write_02(self):
+        original = np.array([[1,2,3,4,5]])
+        X = Matrix(self.sds, original)
+        X.write(self.temp_dir + "02").compute()
+        NX = self.sds.read(self.temp_dir + "02")
+        res = NX.compute()
+        self.assertTrue(np.allclose(original, res))
 
 if __name__ == "__main__":
     unittest.main(exit=False)