You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by ni...@apache.org on 2016/09/02 00:21:04 UTC

incubator-systemml git commit: [SYSTEMML-878] Update the Python package from SystemML to systemml

Repository: incubator-systemml
Updated Branches:
  refs/heads/gh-pages c08740043 -> 298a9e7b9


[SYSTEMML-878] Update the Python package from SystemML to systemml

- Updated Python package name from SystemML to systemml
- Moved uploadToPyPI.sh script to dev/release
- Updated the documentation

Closes #231.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/298a9e7b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/298a9e7b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/298a9e7b

Branch: refs/heads/gh-pages
Commit: 298a9e7b979296310d8ed5826e6224f77eb96941
Parents: c087400
Author: Niketan Pansare <np...@us.ibm.com>
Authored: Thu Sep 1 16:55:57 2016 -0700
Committer: Niketan Pansare <np...@us.ibm.com>
Committed: Thu Sep 1 16:55:57 2016 -0700

----------------------------------------------------------------------
 algorithms-classification.md | 18 +++++++-------
 algorithms-regression.md     |  8 +++----
 beginners-guide-python.md    | 49 +++++++++++++++++++++++++++++----------
 3 files changed, 50 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/298a9e7b/algorithms-classification.md
----------------------------------------------------------------------
diff --git a/algorithms-classification.md b/algorithms-classification.md
index 340267c..8d19d04 100644
--- a/algorithms-classification.md
+++ b/algorithms-classification.md
@@ -129,7 +129,7 @@ Eqs.�(1) and�(2).
 <div class="codetabs">
 <div data-lang="Python" markdown="1">
 {% highlight python %}
-from SystemML.mllearn import LogisticRegression
+from systemml.mllearn import LogisticRegression
 # C = 1/reg
 logistic = LogisticRegression(sqlCtx, fit_intercept=True, max_iter=100, max_inner_iter=0, tol=0.000001, C=1.0)
 # X_train, y_train and X_test can be NumPy matrices or Pandas DataFrame or SciPy Sparse Matrix
@@ -237,7 +237,7 @@ SystemML Language Reference for details.
 {% highlight python %}
 # Scikit-learn way
 from sklearn import datasets, neighbors
-from SystemML.mllearn import LogisticRegression
+from systemml.mllearn import LogisticRegression
 from pyspark.sql import SQLContext
 sqlCtx = SQLContext(sc)
 digits = datasets.load_digits()
@@ -253,7 +253,7 @@ print('LogisticRegression score: %f' % logistic.fit(X_train, y_train).score(X_te
 
 # MLPipeline way
 from pyspark.ml import Pipeline
-from SystemML.mllearn import LogisticRegression
+from systemml.mllearn import LogisticRegression
 from pyspark.ml.feature import HashingTF, Tokenizer
 from pyspark.sql import SQLContext
 sqlCtx = SQLContext(sc)
@@ -498,7 +498,7 @@ support vector machine (`y` with domain size `2`).
 <div class="codetabs">
 <div data-lang="Python" markdown="1">
 {% highlight python %}
-from SystemML.mllearn import SVM
+from systemml.mllearn import SVM
 # C = 1/reg
 svm = SVM(sqlCtx, fit_intercept=True, max_iter=100, tol=0.000001, C=1.0, is_multi_class=False)
 # X_train, y_train and X_test can be NumPy matrices or Pandas DataFrame or SciPy Sparse Matrix
@@ -766,7 +766,7 @@ class labels.
 <div class="codetabs">
 <div data-lang="Python" markdown="1">
 {% highlight python %}
-from SystemML.mllearn import SVM
+from systemml.mllearn import SVM
 # C = 1/reg
 svm = SVM(sqlCtx, fit_intercept=True, max_iter=100, tol=0.000001, C=1.0, is_multi_class=True)
 # X_train, y_train and X_test can be NumPy matrices or Pandas DataFrame or SciPy Sparse Matrix
@@ -916,7 +916,7 @@ SystemML Language Reference for details.
 {% highlight python %}
 # Scikit-learn way
 from sklearn import datasets, neighbors
-from SystemML.mllearn import SVM
+from systemml.mllearn import SVM
 from pyspark.sql import SQLContext
 sqlCtx = SQLContext(sc)
 digits = datasets.load_digits()
@@ -932,7 +932,7 @@ print('LogisticRegression score: %f' % svm.fit(X_train, y_train).score(X_test, y
 
 # MLPipeline way
 from pyspark.ml import Pipeline
-from SystemML.mllearn import SVM
+from systemml.mllearn import SVM
 from pyspark.ml.feature import HashingTF, Tokenizer
 from pyspark.sql import SQLContext
 sqlCtx = SQLContext(sc)
@@ -1122,7 +1122,7 @@ applicable when all features are counts of categorical values.
 <div class="codetabs">
 <div data-lang="Python" markdown="1">
 {% highlight python %}
-from SystemML.mllearn import NaiveBayes
+from systemml.mllearn import NaiveBayes
 nb = NaiveBayes(sqlCtx, laplace=1.0)
 # X_train, y_train and X_test can be NumPy matrices or Pandas DataFrame or SciPy Sparse Matrix
 y_test = nb.fit(X_train, y_train)
@@ -1257,7 +1257,7 @@ SystemML Language Reference for details.
 {% highlight python %}
 from sklearn.datasets import fetch_20newsgroups
 from sklearn.feature_extraction.text import TfidfVectorizer
-from SystemML.mllearn import NaiveBayes
+from systemml.mllearn import NaiveBayes
 from sklearn import metrics
 from pyspark.sql import SQLContext
 sqlCtx = SQLContext(sc)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/298a9e7b/algorithms-regression.md
----------------------------------------------------------------------
diff --git a/algorithms-regression.md b/algorithms-regression.md
index 6585b00..992862e 100644
--- a/algorithms-regression.md
+++ b/algorithms-regression.md
@@ -82,7 +82,7 @@ efficient when the number of features $m$ is relatively small
 <div class="codetabs">
 <div data-lang="Python" markdown="1">
 {% highlight python %}
-from SystemML.mllearn import LinearRegression
+from systemml.mllearn import LinearRegression
 # C = 1/reg
 lr = LinearRegression(sqlCtx, fit_intercept=True, C=1.0, solver='direct-solve')
 # X_train, y_train and X_test can be NumPy matrices or Pandas DataFrame or SciPy Sparse Matrix
@@ -124,7 +124,7 @@ y_test = lr.fit(df_train)
 <div class="codetabs">
 <div data-lang="Python" markdown="1">
 {% highlight python %}
-from SystemML.mllearn import LinearRegression
+from systemml.mllearn import LinearRegression
 # C = 1/reg
 lr = LinearRegression(sqlCtx, fit_intercept=True, max_iter=100, tol=0.000001, C=1.0, solver='newton-cg')
 # X_train, y_train and X_test can be NumPy matrices or Pandas DataFrames or SciPy Sparse matrices
@@ -222,7 +222,7 @@ SystemML Language Reference for details.
 {% highlight python %}
 import numpy as np
 from sklearn import datasets
-from SystemML.mllearn import LinearRegression
+from systemml.mllearn import LinearRegression
 from pyspark.sql import SQLContext
 # Load the diabetes dataset
 diabetes = datasets.load_diabetes()
@@ -277,7 +277,7 @@ print("Residual sum of squares: %.2f" % np.mean((regr.predict(diabetes_X_test) -
 {% highlight python %}
 import numpy as np
 from sklearn import datasets
-from SystemML.mllearn import LinearRegression
+from systemml.mllearn import LinearRegression
 from pyspark.sql import SQLContext
 # Load the diabetes dataset
 diabetes = datasets.load_diabetes()

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/298a9e7b/beginners-guide-python.md
----------------------------------------------------------------------
diff --git a/beginners-guide-python.md b/beginners-guide-python.md
index 3b4aeed..f040212 100644
--- a/beginners-guide-python.md
+++ b/beginners-guide-python.md
@@ -72,7 +72,7 @@ brew install apache-spark
 #### Step 1: Install SystemML Python package 
 
 ```bash
-pip install SystemML
+pip install systemml
 ```
 
 #### Step 2: Download SystemML Java binaries
@@ -81,14 +81,14 @@ SystemML Python package downloads the corresponding Java binaries (along with al
 into the installed location. To find the location of the downloaded Java binaries, use the following command:
 
 ```bash
-python -c 'import imp; import os; print os.path.join(imp.find_module("SystemML")[1], "SystemML-java")'
+python -c 'import imp; import os; print os.path.join(imp.find_module("systemml")[1], "systemml-java")'
 ```
 
 #### Step 3: (Optional but recommended) Set SYSTEMML_HOME environment variable
 <div class="codetabs">
 <div data-lang="OSX" markdown="1">
 ```bash
-SYSTEMML_HOME=`python -c 'import imp; import os; print os.path.join(imp.find_module("SystemML")[1], "SystemML-java")'`
+SYSTEMML_HOME=`python -c 'import imp; import os; print os.path.join(imp.find_module("systemml")[1], "systemml-java")'`
 # If you are using zsh or ksh or csh, append it to ~/.zshrc or ~/.profile or ~/.login respectively.
 echo '' >> ~/.bashrc
 echo 'export SYSTEMML_HOME='$SYSTEMML_HOME >> ~/.bashrc
@@ -96,7 +96,7 @@ echo 'export SYSTEMML_HOME='$SYSTEMML_HOME >> ~/.bashrc
 </div>
 <div data-lang="Linux" markdown="1">
 ```bash
-SYSTEMML_HOME=`python -c 'import imp; import os; print os.path.join(imp.find_module("SystemML")[1], "SystemML-java")'`
+SYSTEMML_HOME=`python -c 'import imp; import os; print os.path.join(imp.find_module("systemml")[1], "systemml-java")'`
 # If you are using zsh or ksh or csh, append it to ~/.zshrc or ~/.profile or ~/.login respectively.
 echo '' >> ~/.bashrc
 echo 'export SYSTEMML_HOME='$SYSTEMML_HOME >> ~/.bashrc
@@ -128,7 +128,7 @@ pyspark --master local[*] --driver-class-path $SYSTEMML_HOME"/SystemML.jar"
 To get started with SystemML, let's try few elementary matrix multiplication operations:
 
 ```python
-import SystemML as sml
+import systemml as sml
 import numpy as np
 sml.setSparkContext(sc)
 m1 = sml.matrix(np.ones((3,3)) + 2)
@@ -152,7 +152,7 @@ model: $ \beta = solve(X^T X, X^T y) $. For simplicity, we will use direct-solve
 ```python
 import numpy as np
 from sklearn import datasets
-import SystemML as sml
+import systemml as sml
 from pyspark.sql import SQLContext
 # Load the diabetes dataset
 diabetes = datasets.load_diabetes()
@@ -196,7 +196,7 @@ algorithm.
 ```python
 import numpy as np
 from sklearn import datasets
-from SystemML.mllearn import LinearRegression
+from systemml.mllearn import LinearRegression
 from pyspark.sql import SQLContext
 # Load the diabetes dataset
 diabetes = datasets.load_diabetes()
@@ -230,7 +230,7 @@ algorithm on digits datasets.
 ```python
 # Scikit-learn way
 from sklearn import datasets, neighbors
-from SystemML.mllearn import LogisticRegression
+from systemml.mllearn import LogisticRegression
 from pyspark.sql import SQLContext
 sqlCtx = SQLContext(sc)
 digits = datasets.load_digits()
@@ -245,15 +245,21 @@ logistic = LogisticRegression(sqlCtx)
 print('LogisticRegression score: %f' % logistic.fit(X_train, y_train).score(X_test, y_test))
 ```
 
+Output:
+
+```bash
+LogisticRegression score: 0.922222
+```
+
 ### Passing PySpark DataFrame
 
 To train the above algorithm on larger dataset, we can load the dataset into DataFrame and pass it to the `fit` method:
 
 ```python
 from sklearn import datasets, neighbors
-from SystemML.mllearn import LogisticRegression
+from systemml.mllearn import LogisticRegression
 from pyspark.sql import SQLContext
-import SystemML as sml
+import systemml as sml
 sqlCtx = SQLContext(sc)
 digits = datasets.load_digits()
 X_digits = digits.data
@@ -267,6 +273,12 @@ logistic = LogisticRegression(sqlCtx)
 print('LogisticRegression score: %f' % logistic.fit(df_train).score(X_test, y_test))
 ```
 
+Output:
+
+```bash
+LogisticRegression score: 0.922222
+```
+
 ### MLPipeline interface
 
 In the below example, we demonstrate how the same `LogisticRegression` class can allow SystemML to fit seamlessly into 
@@ -275,7 +287,7 @@ large data pipelines.
 ```python
 # MLPipeline way
 from pyspark.ml import Pipeline
-from SystemML.mllearn import LogisticRegression
+from systemml.mllearn import LogisticRegression
 from pyspark.ml.feature import HashingTF, Tokenizer
 from pyspark.sql import SQLContext
 sqlCtx = SQLContext(sc)
@@ -307,6 +319,19 @@ prediction = model.transform(test)
 prediction.show()
 ```
 
+Output:
+
+```bash
++--+---------------+--------------------+--------------------+--------------------+---+----------+
+|id|           text|               words|            features|         probability| ID|prediction|
++--+---------------+--------------------+--------------------+--------------------+---+----------+
+|12|    spark i j k|ArrayBuffer(spark...|(20,[5,6,7],[2.0,...|[0.99999999999975...|1.0|       1.0|
+|13|          l m n|ArrayBuffer(l, m, n)|(20,[8,9,10],[1.0...|[1.37552128844736...|2.0|       2.0|
+|14|mapreduce spark|ArrayBuffer(mapre...|(20,[5,10],[1.0,1...|[0.99860290938153...|3.0|       1.0|
+|15|  apache hadoop|ArrayBuffer(apach...|(20,[9,14],[1.0,1...|[5.41688748236143...|4.0|       2.0|
++--+---------------+--------------------+--------------------+--------------------+---+----------+
+```
+
 ## Invoking DML/PyDML scripts using MLContext
 
 The below example demonstrates how to invoke the algorithm [scripts/algorithms/MultiLogReg.dml](https://github.com/apache/incubator-systemml/blob/master/scripts/algorithms/MultiLogReg.dml)
@@ -315,7 +340,7 @@ using Python [MLContext API](https://apache.github.io/incubator-systemml/spark-m
 ```python
 from sklearn import datasets, neighbors
 from pyspark.sql import DataFrame, SQLContext
-import SystemML as sml
+import systemml as sml
 import pandas as pd
 import os
 sqlCtx = SQLContext(sc)