You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by gw...@apache.org on 2017/10/08 00:25:00 UTC
systemml git commit: [SYSTEMML-1929] Update Spark parameters in
sparkDML.sh and docs
Repository: systemml
Updated Branches:
refs/heads/master bfb30b3af -> 0505fd38c
[SYSTEMML-1929] Update Spark parameters in sparkDML.sh and docs
Closes #670.
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/0505fd38
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/0505fd38
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/0505fd38
Branch: refs/heads/master
Commit: 0505fd38c3191551a14c9b21314b0c3432b47e2f
Parents: bfb30b3
Author: Glenn Weidner <gw...@us.ibm.com>
Authored: Sat Oct 7 17:22:18 2017 -0700
Committer: Glenn Weidner <gw...@us.ibm.com>
Committed: Sat Oct 7 17:22:18 2017 -0700
----------------------------------------------------------------------
docs/algorithms-classification.md | 88 ++++++++++----------
docs/algorithms-clustering.md | 28 +++----
docs/algorithms-descriptive-statistics.md | 28 +++----
docs/algorithms-matrix-factorization.md | 36 ++++----
docs/algorithms-regression.md | 72 ++++++++--------
docs/algorithms-survival-analysis.md | 32 +++----
docs/spark-batch-mode.md | 8 +-
docs/spark-mlcontext-programming-guide.md | 4 +-
.../examples/mnist_lenet_distrib_sgd-train.dml | 2 +-
scripts/perftest/python/run_perftest.py | 1 -
scripts/sparkDML.sh | 17 ++--
src/main/resources/scripts/sparkDML.sh | 17 ++--
12 files changed, 169 insertions(+), 164 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/systemml/blob/0505fd38/docs/algorithms-classification.md
----------------------------------------------------------------------
diff --git a/docs/algorithms-classification.md b/docs/algorithms-classification.md
index 1895103..62e40e7 100644
--- a/docs/algorithms-classification.md
+++ b/docs/algorithms-classification.md
@@ -160,9 +160,9 @@ val prediction = model.transform(X_test_df)
fmt=[format]
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f MultiLogReg.dml
-config SystemML-config.xml
@@ -331,9 +331,9 @@ prediction.show()
Log=/user/ml/log.csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f MultiLogReg.dml
-config SystemML-config.xml
@@ -527,9 +527,9 @@ val model = svm.fit(X_train_df)
fmt=[format]
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f l2-svm.dml
-config SystemML-config.xml
@@ -574,9 +574,9 @@ val prediction = model.transform(X_test_df)
fmt=[format]
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f l2-svm-predict.dml
-config SystemML-config.xml
@@ -658,9 +658,9 @@ more details on the Python API.
Log=/user/ml/Log.csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f l2-svm.dml
-config SystemML-config.xml
@@ -692,9 +692,9 @@ more details on the Python API.
confusion=/user/ml/confusion.csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f l2-svm-predict.dml
-config SystemML-config.xml
@@ -797,9 +797,9 @@ val model = svm.fit(X_train_df)
fmt=[format]
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f m-svm.dml
-config SystemML-config.xml
@@ -844,9 +844,9 @@ val prediction = model.transform(X_test_df)
fmt=[format]
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f m-svm-predict.dml
-config SystemML-config.xml
@@ -1009,9 +1009,9 @@ prediction.show()
Log=/user/ml/Log.csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f m-svm.dml
-config SystemML-config.xml
@@ -1043,9 +1043,9 @@ prediction.show()
confusion=/user/ml/confusion.csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f m-svm-predict.dml
-config SystemML-config.xml
@@ -1148,9 +1148,9 @@ val model = nb.fit(X_train_df)
fmt=[format]
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f naive-bayes.dml
-config SystemML-config.xml
@@ -1193,9 +1193,9 @@ val prediction = model.transform(X_test_df)
probabilities=[file]
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f naive-bayes-predict.dml
-config SystemML-config.xml
@@ -1284,9 +1284,9 @@ metrics.f1_score(newsgroups_test.target, pred, average='weighted')
accuracy=/user/ml/accuracy.csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f naive-bayes.dml
-config SystemML-config.xml
@@ -1316,9 +1316,9 @@ metrics.f1_score(newsgroups_test.target, pred, average='weighted')
confusion=/user/ml/confusion.csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f naive-bayes-predict.dml
-config SystemML-config.xml
@@ -1415,9 +1415,9 @@ implementation is well-suited to handle large-scale data and builds a
fmt=[format]
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f decision-tree.dml
-config SystemML-config.xml
@@ -1453,9 +1453,9 @@ implementation is well-suited to handle large-scale data and builds a
fmt=[format]
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f decision-tree-predict.dml
-config SystemML-config.xml
@@ -1553,9 +1553,9 @@ SystemML Language Reference for details.
fmt=csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f decision-tree.dml
-config SystemML-config.xml
@@ -1588,9 +1588,9 @@ SystemML Language Reference for details.
fmt=csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f decision-tree-predict.dml
-config SystemML-config.xml
@@ -1823,9 +1823,9 @@ for classification in parallel.
fmt=[format]
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f random-forest.dml
-config SystemML-config.xml
@@ -1866,9 +1866,9 @@ for classification in parallel.
fmt=[format]
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f random-forest-predict.dml
-config SystemML-config.xml
@@ -1989,9 +1989,9 @@ SystemML Language Reference for details.
fmt=csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f random-forest.dml
-config SystemML-config.xml
@@ -2027,9 +2027,9 @@ To compute predictions:
fmt=csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f random-forest-predict.dml
-config SystemML-config.xml
http://git-wip-us.apache.org/repos/asf/systemml/blob/0505fd38/docs/algorithms-clustering.md
----------------------------------------------------------------------
diff --git a/docs/algorithms-clustering.md b/docs/algorithms-clustering.md
index 7554660..358a53a 100644
--- a/docs/algorithms-clustering.md
+++ b/docs/algorithms-clustering.md
@@ -129,9 +129,9 @@ apart is a "false negative" etc.
verb=[boolean]
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f Kmeans.dml
-config SystemML-config.xml
@@ -163,9 +163,9 @@ apart is a "false negative" etc.
O=[file]
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f Kmeans-predict.dml
-config SystemML-config.xml
@@ -255,9 +255,9 @@ standard output
fmt=csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f Kmeans.dml
-config SystemML-config.xml
@@ -284,9 +284,9 @@ standard output
verb=1
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f Kmeans.dml
-config SystemML-config.xml
@@ -317,9 +317,9 @@ To predict Y given X and C:
O=/user/ml/stats.csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f Kmeans-predict.dml
-config SystemML-config.xml
@@ -343,9 +343,9 @@ given X and C:
O=/user/ml/stats.csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f Kmeans-predict.dml
-config SystemML-config.xml
@@ -368,9 +368,9 @@ labels prY:
O=/user/ml/stats.csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f Kmeans-predict.dml
-config SystemML-config.xml
http://git-wip-us.apache.org/repos/asf/systemml/blob/0505fd38/docs/algorithms-descriptive-statistics.md
----------------------------------------------------------------------
diff --git a/docs/algorithms-descriptive-statistics.md b/docs/algorithms-descriptive-statistics.md
index f45ffae..1c86368 100644
--- a/docs/algorithms-descriptive-statistics.md
+++ b/docs/algorithms-descriptive-statistics.md
@@ -125,9 +125,9 @@ to compute the mean of a categorical attribute like ‘Hair Color’.
STATS=<file>
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f Univar-Stats.dml
-config SystemML-config.xml
@@ -164,9 +164,9 @@ be stored. The format of the output matrix is defined by
STATS=/user/ml/stats.mtx
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f Univar-Stats.dml
-config SystemML-config.xml
@@ -585,9 +585,9 @@ attributes like ‘Hair Color’.
OUTDIR=<directory>
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f bivar-stats.dml
-config SystemML-config.xml
@@ -654,9 +654,9 @@ are defined in [**Table 2**](algorithms-descriptive-statistics.html#table2).
OUTDIR=/user/ml/stats.mtx
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f bivar-stats.dml
-config SystemML-config.xml
@@ -1147,9 +1147,9 @@ becomes reversed and amplified (from $+0.1$ to $-0.5$) if we ignore the months.
fmt=[format]
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f stratstats.dml
-config SystemML-config.xml
@@ -1355,9 +1355,9 @@ SystemML Language Reference for details.
fmt=csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f stratstats.dml
-config SystemML-config.xml
@@ -1383,9 +1383,9 @@ SystemML Language Reference for details.
O=/user/ml/Out.mtx
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f stratstats.dml
-config SystemML-config.xml
http://git-wip-us.apache.org/repos/asf/systemml/blob/0505fd38/docs/algorithms-matrix-factorization.md
----------------------------------------------------------------------
diff --git a/docs/algorithms-matrix-factorization.md b/docs/algorithms-matrix-factorization.md
index 8777130..b559cb5 100644
--- a/docs/algorithms-matrix-factorization.md
+++ b/docs/algorithms-matrix-factorization.md
@@ -56,9 +56,9 @@ top-$K$ (for a given value of $K$) principal components.
OUTPUT=<file>
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f PCA.dml
-config SystemML-config.xml
@@ -119,9 +119,9 @@ SystemML Language Reference for details.
OUTPUT=/user/ml/pca_output/
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f PCA.dml
-config SystemML-config.xml
@@ -149,9 +149,9 @@ SystemML Language Reference for details.
OUTPUT=/user/ml/test_output.mtx
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f PCA.dml
-config SystemML-config.xml
@@ -257,9 +257,9 @@ problems.
fmt=[format]
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f ALS.dml
-config SystemML-config.xml
@@ -291,9 +291,9 @@ problems.
fmt=[format]
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f ALS_predict.dml
-config SystemML-config.xml
@@ -322,9 +322,9 @@ problems.
fmt=[format]
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f ALS_topk_predict.dml
-config SystemML-config.xml
@@ -431,9 +431,9 @@ SystemML Language Reference for details.
fmt=csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f ALS.dml
-config SystemML-config.xml
@@ -467,9 +467,9 @@ To compute predicted ratings for a given list of users and items:
fmt=csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f ALS_predict.dml
-config SystemML-config.xml
@@ -501,9 +501,9 @@ predicted ratings for a given list of users:
fmt=csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f ALS_topk_predict.dml
-config SystemML-config.xml
http://git-wip-us.apache.org/repos/asf/systemml/blob/0505fd38/docs/algorithms-regression.md
----------------------------------------------------------------------
diff --git a/docs/algorithms-regression.md b/docs/algorithms-regression.md
index df2ad3e..18640b8 100644
--- a/docs/algorithms-regression.md
+++ b/docs/algorithms-regression.md
@@ -102,9 +102,9 @@ y_test = lr.fit(df_train)
fmt=[format]
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f LinearRegDS.dml
-config SystemML-config.xml
@@ -147,9 +147,9 @@ y_test = lr.fit(df_train)
fmt=[format]
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f LinearRegCG.dml
-config SystemML-config.xml
@@ -254,9 +254,9 @@ print("Residual sum of squares: %.2f" % np.mean((regr.predict(diabetes_X_test) -
reg=1.0
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f LinearRegDS.dml
-config SystemML-config.xml
@@ -311,9 +311,9 @@ print("Residual sum of squares: %.2f" % np.mean((regr.predict(diabetes_X_test) -
Log=/user/ml/log.csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f LinearRegCG.dml
-config SystemML-config.xml
@@ -552,9 +552,9 @@ lowest AIC is computed.
fmt=[format]
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f StepLinearRegDS.dml
-config SystemML-config.xml
@@ -623,9 +623,9 @@ SystemML Language Reference for details.
fmt=csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f StepLinearRegDS.dml
-config SystemML-config.xml
@@ -755,9 +755,9 @@ distributions and link functions, see below for details.
mii=[int]
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f GLM.dml
-config SystemML-config.xml
@@ -893,9 +893,9 @@ if no maximum limit provided
Log=/user/ml/log.csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f GLM.dml
-config SystemML-config.xml
@@ -1230,9 +1230,9 @@ distribution family is supported (see below for details).
fmt=[format]
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f StepGLM.dml
-config SystemML-config.xml
@@ -1335,9 +1335,9 @@ SystemML Language Reference for details.
fmt=csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f StepGLM.dml
-config SystemML-config.xml
@@ -1481,9 +1481,9 @@ this step outside the scope of `GLM-predict.dml` for now.
fmt=[format]
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f GLM-predict.dml
-config SystemML-config.xml
@@ -1620,9 +1620,9 @@ unknown (which sets it to `1.0`).
O=/user/ml/stats.csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f GLM-predict.dml
-config SystemML-config.xml
@@ -1656,9 +1656,9 @@ unknown (which sets it to `1.0`).
fmt=csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f GLM-predict.dml
-config SystemML-config.xml
@@ -1690,9 +1690,9 @@ unknown (which sets it to `1.0`).
O=/user/ml/stats.csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f GLM-predict.dml
-config SystemML-config.xml
@@ -1725,9 +1725,9 @@ unknown (which sets it to `1.0`).
O=/user/ml/stats.csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f GLM-predict.dml
-config SystemML-config.xml
@@ -1758,9 +1758,9 @@ unknown (which sets it to `1.0`).
O=/user/ml/stats.csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f GLM-predict.dml
-config SystemML-config.xml
@@ -1793,9 +1793,9 @@ unknown (which sets it to `1.0`).
O=/user/ml/stats.csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f GLM-predict.dml
-config SystemML-config.xml
@@ -1832,9 +1832,9 @@ unknown (which sets it to `1.0`).
O=/user/ml/stats.csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f GLM-predict.dml
-config SystemML-config.xml
http://git-wip-us.apache.org/repos/asf/systemml/blob/0505fd38/docs/algorithms-survival-analysis.md
----------------------------------------------------------------------
diff --git a/docs/algorithms-survival-analysis.md b/docs/algorithms-survival-analysis.md
index 239ab08..943d4d7 100644
--- a/docs/algorithms-survival-analysis.md
+++ b/docs/algorithms-survival-analysis.md
@@ -57,9 +57,9 @@ censored and uncensored survival times.
fmt=[format]
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f KM.dml
-config SystemML-config.xml
@@ -152,9 +152,9 @@ SystemML Language Reference for details.
fmt=csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f KM.dml
-config SystemML-config.xml
@@ -189,9 +189,9 @@ SystemML Language Reference for details.
fmt=csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f KM.dml
-config SystemML-config.xml
@@ -461,9 +461,9 @@ may be categorical (ordinal or nominal) as well as continuous-valued.
fmt=[format]
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f Cox.dml
-config SystemML-config.xml
@@ -503,9 +503,9 @@ may be categorical (ordinal or nominal) as well as continuous-valued.
fmt=[format]
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f Cox-predict.dml
-config SystemML-config.xml
@@ -612,9 +612,9 @@ SystemML Language Reference for details.
fmt=csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f Cox.dml
-config SystemML-config.xml
@@ -651,9 +651,9 @@ SystemML Language Reference for details.
fmt=csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f Cox.dml
-config SystemML-config.xml
@@ -691,9 +691,9 @@ SystemML Language Reference for details.
fmt=csv
</div>
<div data-lang="Spark" markdown="1">
- $SPARK_HOME/bin/spark-submit --master yarn-cluster
+ $SPARK_HOME/bin/spark-submit --master yarn
+ --deploy-mode cluster
--conf spark.driver.maxResultSize=0
- --conf spark.akka.frameSize=128
SystemML.jar
-f Cox-predict.dml
-config SystemML-config.xml
http://git-wip-us.apache.org/repos/asf/systemml/blob/0505fd38/docs/spark-batch-mode.md
----------------------------------------------------------------------
diff --git a/docs/spark-batch-mode.md b/docs/spark-batch-mode.md
index 7f8f4c0..349f17c 100644
--- a/docs/spark-batch-mode.md
+++ b/docs/spark-batch-mode.md
@@ -41,7 +41,7 @@ mode in more depth.
# Spark Batch Mode Invocation Syntax
-SystemML can be invoked in Hadoop Batch mode using the following syntax:
+SystemML can be invoked in Spark Batch mode using the following syntax:
spark-submit SystemML.jar [-? | -help | -f <filename>] (-config <config_filename>) ([-args | -nvargs] <args-list>)
@@ -63,7 +63,7 @@ to be deprecated. All the primary algorithm scripts included with SystemML use n
# Execution modes
SystemML works seamlessly with all Spark execution modes, including *local* (`--master local[*]`),
-*yarn client* (`--master yarn-client`), *yarn cluster* (`--master yarn-cluster`), *etc*. More
+*yarn client* (`--master yarn --deploy-mode client`), *yarn cluster* (`--master yarn --deploy-mode cluster`), *etc*. More
information on Spark cluster execution modes can be found on the
[official Spark cluster deployment documentation](https://spark.apache.org/docs/latest/cluster-overview.html).
*Note* that Spark can be easily run on a laptop in local mode using the `--master local[*]` described
@@ -71,8 +71,8 @@ above, which SystemML supports.
# Recommended Spark Configuration Settings
-For best performance, we recommend setting the following flags when running SystemML with Spark:
-`--conf spark.driver.maxResultSize=0 --conf spark.akka.frameSize=128`.
+For best performance, we recommend setting the following configuration value when running SystemML with Spark:
+`--conf spark.driver.maxResultSize=0`.
# Examples
http://git-wip-us.apache.org/repos/asf/systemml/blob/0505fd38/docs/spark-mlcontext-programming-guide.md
----------------------------------------------------------------------
diff --git a/docs/spark-mlcontext-programming-guide.md b/docs/spark-mlcontext-programming-guide.md
index e935c65..63e48be 100644
--- a/docs/spark-mlcontext-programming-guide.md
+++ b/docs/spark-mlcontext-programming-guide.md
@@ -2814,5 +2814,5 @@ plt.title('PNMF Training Loss')
# Recommended Spark Configuration Settings
-For best performance, we recommend setting the following flags when running SystemML with Spark:
-`--conf spark.driver.maxResultSize=0 --conf spark.akka.frameSize=128`.
+For best performance, we recommend setting the following configuration value when running SystemML with Spark:
+`--conf spark.driver.maxResultSize=0`.
http://git-wip-us.apache.org/repos/asf/systemml/blob/0505fd38/scripts/nn/examples/mnist_lenet_distrib_sgd-train.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/examples/mnist_lenet_distrib_sgd-train.dml b/scripts/nn/examples/mnist_lenet_distrib_sgd-train.dml
index c397c1f..7243f6a 100644
--- a/scripts/nn/examples/mnist_lenet_distrib_sgd-train.dml
+++ b/scripts/nn/examples/mnist_lenet_distrib_sgd-train.dml
@@ -62,7 +62,7 @@
# 2. Execute using Spark
# ```
# spark-submit --master local[*] --driver-memory 10G
-# --conf spark.driver.maxResultSize=0 --conf spark.akka.frameSize=128
+# --conf spark.driver.maxResultSize=0
# $SYSTEMML_HOME/target/SystemML.jar -f nn/examples/mnist_lenet_distrib_sgd-train.dml
# -nvargs train=nn/examples/data/mnist/mnist_train.csv test=nn/examples/data/mnist/mnist_test.csv
# C=1 Hin=28 Win=28 K=10 batch_size=32 parallel_batches=4 epochs=10
http://git-wip-us.apache.org/repos/asf/systemml/blob/0505fd38/scripts/perftest/python/run_perftest.py
----------------------------------------------------------------------
diff --git a/scripts/perftest/python/run_perftest.py b/scripts/perftest/python/run_perftest.py
index 7fd40ec..6c016a8 100755
--- a/scripts/perftest/python/run_perftest.py
+++ b/scripts/perftest/python/run_perftest.py
@@ -299,7 +299,6 @@ if __name__ == '__main__':
# Default Conf
default_conf = 'spark.driver.maxResultSize=0 ' \
- 'spark.akka.frameSize=128 ' \
'spark.network.timeout=6000s ' \
'spark.rpc.askTimeout=6000s ' \
'spark.memory.useLegacyMode=true ' \
http://git-wip-us.apache.org/repos/asf/systemml/blob/0505fd38/scripts/sparkDML.sh
----------------------------------------------------------------------
diff --git a/scripts/sparkDML.sh b/scripts/sparkDML.sh
index 7bea639..4a098b1 100755
--- a/scripts/sparkDML.sh
+++ b/scripts/sparkDML.sh
@@ -39,12 +39,13 @@ fi
# Default Values
-master="--master yarn-client"
+master="--master yarn"
+deploy_mode="--deploy-mode client"
driver_memory="--driver-memory 20G"
num_executors="--num-executors 5"
executor_memory="--executor-memory 60G"
executor_cores="--executor-cores 24"
-conf="--conf spark.driver.maxResultSize=0 --conf spark.akka.frameSize=128"
+conf="--conf spark.driver.maxResultSize=0"
# error help print
@@ -58,19 +59,19 @@ Usage: $0 [-h] [SPARK-SUBMIT OPTIONS] -f <dml-filename> [SYSTEMML OPTIONS]
Examples:
$0 -f genGNMF.dml --nvargs V=/tmp/V.mtx W=/tmp/W.mtx H=/tmp/H.mtx rows=100000 cols=800 k=50
$0 --driver-memory 5G -f GNMF.dml --explain hops -nvargs ...
- $0 --master yarn-cluster -f hdfs:/user/GNMF.dml
+ $0 --master yarn --deploy-mode cluster -f hdfs:/user/GNMF.dml
-h | -? Print this usage message and exit
SPARK-SUBMIT OPTIONS:
--conf <property>=<value> Configuration settings:
spark.driver.maxResultSize Default: 0
- spark.akka.frameSize Default: 128
- --driver-memory <num> Memory for driver (e.g. 512M)] Default: 20G
- --master <string> local | yarn-client | yarn-cluster] Default: yarn-client
+ --driver-memory <num> Memory for driver (e.g. 512M) Default: 20G
+ --master <string> local | yarn Default: yarn
+ --deploy-mode <string> client | cluster Default: client
--num-executors <num> Number of executors to launch (e.g. 2) Default: 5
--executor-memory <num> Memory per executor (e.g. 1G) Default: 60G
- --executor-cores <num> Memory per executor (e.g. ) Default: 24
+ --executor-cores <num> Number of cores per executor (e.g. 1) Default: 24
-f DML script file name, e.g. hdfs:/user/biadmin/test.dml
@@ -90,6 +91,7 @@ while true ; do
case "$1" in
-h) printUsageExit ; exit 1 ;;
--master) master="--master "$2 ; shift 2 ;;
+ --deploy-mode) deploy_mode="--deploy-mode "$2 ; shift 2 ;;
--driver-memory) driver_memory="--driver-memory "$2 ; shift 2 ;;
--num-executors) num_executors="--num-executors "$2 ; shift 2 ;;
--executor-memory) executor_memory="--executor-memory "$2 ; shift 2 ;;
@@ -109,6 +111,7 @@ done
$SPARK_HOME/bin/spark-submit \
${master} \
+ ${deploy_mode} \
${driver_memory} \
${num_executors} \
${executor_memory} \
http://git-wip-us.apache.org/repos/asf/systemml/blob/0505fd38/src/main/resources/scripts/sparkDML.sh
----------------------------------------------------------------------
diff --git a/src/main/resources/scripts/sparkDML.sh b/src/main/resources/scripts/sparkDML.sh
index a68d34a..1f1bdd6 100644
--- a/src/main/resources/scripts/sparkDML.sh
+++ b/src/main/resources/scripts/sparkDML.sh
@@ -39,12 +39,13 @@ fi
# Default Values
-master="--master yarn-client"
+master="--master yarn"
+deploy_mode="--deploy-mode client"
driver_memory="--driver-memory 20G"
num_executors="--num-executors 5"
executor_memory="--executor-memory 60G"
executor_cores="--executor-cores 24"
-conf="--conf spark.driver.maxResultSize=0 --conf spark.akka.frameSize=128"
+conf="--conf spark.driver.maxResultSize=0"
# error help print
@@ -58,19 +59,19 @@ Usage: $0 [-h] [SPARK-SUBMIT OPTIONS] -f <dml-filename> [SYSTEMML OPTIONS]
Examples:
$0 -f genGNMF.dml --nvargs V=/tmp/V.mtx W=/tmp/W.mtx H=/tmp/H.mtx rows=100000 cols=800 k=50
$0 --driver-memory 5G -f GNMF.dml --explain hops -nvargs ...
- $0 --master yarn-cluster -f hdfs:/user/GNMF.dml
+ $0 --master yarn --deploy-mode cluster -f hdfs:/user/GNMF.dml
-h | -? Print this usage message and exit
SPARK-SUBMIT OPTIONS:
--conf <property>=<value> Configuration settings:
spark.driver.maxResultSize Default: 0
- spark.akka.frameSize Default: 128
- --driver-memory <num> Memory for driver (e.g. 512M)] Default: 20G
- --master <string> local | yarn-client | yarn-cluster] Default: yarn-client
+ --driver-memory <num> Memory for driver (e.g. 512M) Default: 20G
+ --master <string> local | yarn Default: yarn
+ --deploy-mode <string> client | cluster Default: client
--num-executors <num> Number of executors to launch (e.g. 2) Default: 5
--executor-memory <num> Memory per executor (e.g. 1G) Default: 60G
- --executor-cores <num> Memory per executor (e.g. ) Default: 24
+ --executor-cores <num> Number of cores per executor (e.g. 1) Default: 24
-f DML script file name, e.g. hdfs:/user/biadmin/test.dml
@@ -90,6 +91,7 @@ while true ; do
case "$1" in
-h) printUsageExit ; exit 1 ;;
--master) master="--master "$2 ; shift 2 ;;
+ --deploy-mode) deploy_mode="--deploy-mode "$2 ; shift 2 ;;
--driver-memory) driver_memory="--driver-memory "$2 ; shift 2 ;;
--num-executors) num_executors="--num-executors "$2 ; shift 2 ;;
--executor-memory) executor_memory="--executor-memory "$2 ; shift 2 ;;
@@ -109,6 +111,7 @@ done
$SPARK_HOME/bin/spark-submit \
${master} \
+ ${deploy_mode} \
${driver_memory} \
${num_executors} \
${executor_memory} \