You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by ba...@apache.org on 2022/04/19 19:20:52 UTC

[systemds] branch main updated: [SYSTEMDS-3351] Python Test Docs (algorithm example)

This is an automated email from the ASF dual-hosted git repository.

baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new ce1dde31ca [SYSTEMDS-3351] Python Test Docs (algorithm example)
ce1dde31ca is described below

commit ce1dde31ca1443dc8049f5143208404d53bf7413
Author: baunsgaard <ba...@tugraz.at>
AuthorDate: Tue Apr 19 20:57:25 2022 +0200

    [SYSTEMDS-3351] Python Test Docs (algorithm example)
    
    Closes #1588
---
 .../code/getting_started/simpleExamples/l2svm.py   |  1 -
 .../simpleExamples/l2svm_internal.py               |  1 -
 .../getting_started/simpleExamples/multiply.py     |  1 -
 .../simpleExamples/multiplyMatrix.py               |  5 +-
 .../l2svm.py => guide/algorithms/FullScript.py}    | 37 ++++++------
 .../python/docs/source/guide/algorithms_basics.rst | 65 ++++++----------------
 .../docs/test_algorithms_basics.py}                | 18 ++----
 7 files changed, 40 insertions(+), 88 deletions(-)

diff --git a/src/main/python/docs/source/code/getting_started/simpleExamples/l2svm.py b/src/main/python/docs/source/code/getting_started/simpleExamples/l2svm.py
index 1403b68443..75ecc20202 100644
--- a/src/main/python/docs/source/code/getting_started/simpleExamples/l2svm.py
+++ b/src/main/python/docs/source/code/getting_started/simpleExamples/l2svm.py
@@ -18,7 +18,6 @@
 # under the License.
 #
 # -------------------------------------------------------------
-# Python
 # Import numpy and SystemDS
 import numpy as np
 from systemds.context import SystemDSContext
diff --git a/src/main/python/docs/source/code/getting_started/simpleExamples/l2svm_internal.py b/src/main/python/docs/source/code/getting_started/simpleExamples/l2svm_internal.py
index 5a4f24cfd6..a1f32a1892 100644
--- a/src/main/python/docs/source/code/getting_started/simpleExamples/l2svm_internal.py
+++ b/src/main/python/docs/source/code/getting_started/simpleExamples/l2svm_internal.py
@@ -18,7 +18,6 @@
 # under the License.
 #
 # -------------------------------------------------------------
-# Python
 # Import SystemDS
 from systemds.context import SystemDSContext
 from systemds.operator.algorithm import l2svm
diff --git a/src/main/python/docs/source/code/getting_started/simpleExamples/multiply.py b/src/main/python/docs/source/code/getting_started/simpleExamples/multiply.py
index b53a35e6e3..69b884e05c 100644
--- a/src/main/python/docs/source/code/getting_started/simpleExamples/multiply.py
+++ b/src/main/python/docs/source/code/getting_started/simpleExamples/multiply.py
@@ -18,7 +18,6 @@
 # under the License.
 #
 # -------------------------------------------------------------
-# Python
 # Import SystemDSContext
 from systemds.context import SystemDSContext
 # Create a context and if necessary (no SystemDS py4j instance running)
diff --git a/src/main/python/docs/source/code/getting_started/simpleExamples/multiplyMatrix.py b/src/main/python/docs/source/code/getting_started/simpleExamples/multiplyMatrix.py
index 7bf7ef99f8..ad56f5d438 100644
--- a/src/main/python/docs/source/code/getting_started/simpleExamples/multiplyMatrix.py
+++ b/src/main/python/docs/source/code/getting_started/simpleExamples/multiplyMatrix.py
@@ -18,10 +18,7 @@
 # under the License.
 #
 # -------------------------------------------------------------
-# Python
-import numpy as np  # import numpy
-
-# Import SystemDSContext
+import numpy as np
 from systemds.context import SystemDSContext
 
 # create a random array
diff --git a/src/main/python/docs/source/code/getting_started/simpleExamples/l2svm.py b/src/main/python/docs/source/code/guide/algorithms/FullScript.py
similarity index 64%
copy from src/main/python/docs/source/code/getting_started/simpleExamples/l2svm.py
copy to src/main/python/docs/source/code/guide/algorithms/FullScript.py
index 1403b68443..28fbe14cf1 100644
--- a/src/main/python/docs/source/code/getting_started/simpleExamples/l2svm.py
+++ b/src/main/python/docs/source/code/guide/algorithms/FullScript.py
@@ -18,28 +18,25 @@
 # under the License.
 #
 # -------------------------------------------------------------
-# Python
-# Import numpy and SystemDS
-import numpy as np
 from systemds.context import SystemDSContext
-from systemds.operator.algorithm import l2svm
+from systemds.operator.algorithm import multiLogReg, multiLogRegPredict
+from systemds.examples.tutorials.mnist import DataManager
 
-# Set a seed
-np.random.seed(0)
-# Generate random features and labels in numpy
-# This can easily be exchanged with a data set.
-features = np.array(np.random.randint(
-    100, size=10 * 10) + 1.01, dtype=np.double)
-features.shape = (10, 10)
-labels = np.zeros((10, 1))
+d = DataManager()
 
-# l2svm labels can only be 0 or 1
-for i in range(10):
-    if np.random.random() > 0.5:
-        labels[i][0] = 1
+X = d.get_train_data().reshape((60000, 28*28))
+Y = d.get_train_labels()
+Xt = d.get_test_data().reshape((10000, 28*28))
+Yt = d.get_test_labels()
 
-# compute our model
 with SystemDSContext() as sds:
-    model = l2svm(sds.from_numpy(features),
-                  sds.from_numpy(labels)).compute()
-    print(model)
+    # Train Data
+    X_ds = sds.from_numpy(X)
+    Y_ds = sds.from_numpy(Y) + 1.0
+    bias = multiLogReg(X_ds, Y_ds, maxi=30)
+    # Test data
+    Xt_ds = sds.from_numpy(Xt)
+    Yt_ds = sds.from_numpy(Yt) + 1.0
+    [m, y_pred, acc] = multiLogRegPredict(Xt_ds, bias, Yt_ds).compute()
+
+print(acc)
diff --git a/src/main/python/docs/source/guide/algorithms_basics.rst b/src/main/python/docs/source/guide/algorithms_basics.rst
index 096d1fa672..6c25b8b39d 100644
--- a/src/main/python/docs/source/guide/algorithms_basics.rst
+++ b/src/main/python/docs/source/guide/algorithms_basics.rst
@@ -38,12 +38,10 @@ Step 1: Get Dataset
 SystemDS provides builtin for downloading and setup of the MNIST dataset.
 To setup this simply use
 
-.. code-block:: python
-
-    from systemds.examples.tutorials.mnist import DataManager
-    d = DataManager()
-    X = d.get_train_data()
-    Y = d.get_train_labels()
+.. include:: ../code/guide/algorithms/FullScript.py
+  :code: python
+  :start-line: 22
+  :end-line: 30
 
 Here the DataManager contains the code for downloading and setting up numpy arrays containing the data.
 
@@ -85,19 +83,12 @@ With these steps we are now ready to train a simple model.
 Step 3: Training
 ----------------
 
-To start with, we setup a SystemDS context
-
-.. code-block:: python
-
-    from systemds.context import SystemDSContext
-    sds = SystemDSContext()
+To start with, we setup a SystemDS context and setup the data:
 
-Then setup the data
-
-.. code-block:: python
-
-    X_ds = sds.from_numpy(X)
-    Y_ds = sds.from_numpy( Y)
+.. include:: ../code/guide/algorithms/FullScript.py
+  :start-line: 31
+  :end-line: 35
+  :code: python
 
 to reduce the training time and verify everything works, it is usually good to reduce the amount of data,
 to train on a smaller sample to start with
@@ -177,18 +168,12 @@ To improve further we have to increase the training data, here for example we in
 from our sample of 1k to the full training dataset of 60k, in this example the maxi is set to reduce the number of iterations the algorithm takes,
 to again reduce training time
 
-.. code-block:: python
-
-    X_ds = sds.from_numpy(X)
-    Y_ds = sds.from_numpy(Y)
-
-    bias = multiLogReg(X_ds, Y_ds, maxi=30)
-
-    [_, _, train_acc] = multiLogRegPredict(X_ds, bias, Y_ds).compute()
-    [_, _, test_acc] = multiLogRegPredict(Xt, bias, Yt).compute()
-    print(train_acc, "  ", test_acc)
+.. include:: ../code/guide/algorithms/FullScript.py
+  :start-line: 31
+  :end-line: 43
+  :code: python
 
-With this change the accuracy achieved changes from the previous value to 92%. This is still low on this dataset as can be seen on `MNIST <http://yann.lecun.com/exdb/mnist/>`_.
+With this change the accuracy achieved changes from the previous value to 92%.
 But this is a basic implementation that can be replaced by a variety of algorithms and techniques.
 
 
@@ -199,23 +184,7 @@ The full script, some steps are combined to reduce the overall script.
 One noteworthy change is the + 1 is done on the matrix ready for SystemDS,
 this makes SystemDS responsible for adding the 1 to each value.
 
-.. code-block:: python
-
-    from systemds.context import SystemDSContext
-    from systemds.operator.algorithm import multiLogReg, multiLogRegPredict
-    from systemds.examples.tutorials.mnist import DataManager
-
-    d = DataManager()
-
-    with SystemDSContext() as sds:
-        # Train Data
-        X = sds.from_numpy(d.get_train_data().reshape((60000, 28*28)))
-        Y = sds.from_numpy(d.get_train_labels()) + 1.0
-        bias = multiLogReg(X, Y, maxi=30)
-        # Test data
-        Xt = sds.from_numpy(d.get_test_data().reshape((10000, 28*28)))
-        Yt = sds.from_numpy(d.get_test_labels()) + 1.0
-        [m, y_pred, acc] = multiLogRegPredict(Xt, bias, Yt).compute()
-
-    print(acc)
+.. include:: ../code/guide/algorithms/FullScript.py
+  :start-line: 20
+  :code: python
 
diff --git a/src/main/python/docs/source/code/getting_started/simpleExamples/l2svm_internal.py b/src/main/python/tests/docs/test_algorithms_basics.py
similarity index 66%
copy from src/main/python/docs/source/code/getting_started/simpleExamples/l2svm_internal.py
copy to src/main/python/tests/docs/test_algorithms_basics.py
index 5a4f24cfd6..e008361d3a 100644
--- a/src/main/python/docs/source/code/getting_started/simpleExamples/l2svm_internal.py
+++ b/src/main/python/tests/docs/test_algorithms_basics.py
@@ -18,18 +18,10 @@
 # under the License.
 #
 # -------------------------------------------------------------
-# Python
-# Import SystemDS
-from systemds.context import SystemDSContext
-from systemds.operator.algorithm import l2svm
 
-with SystemDSContext() as sds:
-    # Generate 10 by 10 matrix with values in range 0 to 100.
-    features = sds.rand(10, 10, 0, 100)
-    # Add value to all cells in features
-    features += 1.1
-    # Generate labels of all ones and zeros
-    labels = sds.rand(10, 1, 1, 1, sparsity = 0.5)
+import unittest
 
-    model = l2svm(features, labels).compute()
-    print(model)
+
+class TestAlgorithmsBasics(unittest.TestCase):
+    def test_algorithms_script(self):
+        import docs.source.code.guide.algorithms.FullScript