You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2015/10/28 16:50:27 UTC

spark git commit: [SPARK-11369][ML][R] SparkR glm should support setting standardize

Repository: spark
Updated Branches:
  refs/heads/master fd9e345ce -> fba9e9545


[SPARK-11369][ML][R] SparkR glm should support setting standardize

SparkR glm currently support :
```formula, family = c(“gaussian”, “binomial”), data, lambda = 0, alpha = 0```
We should also support setting standardize which has been defined at [design documentation](https://docs.google.com/document/d/10NZNSEurN2EdWM31uFYsgayIPfCFHiuIu3pCWrUmP_c/edit)

Author: Yanbo Liang <yb...@gmail.com>

Closes #9331 from yanboliang/spark-11369.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fba9e954
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fba9e954
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fba9e954

Branch: refs/heads/master
Commit: fba9e95452ca0a9b589bc14b27c750c69f482b8d
Parents: fd9e345
Author: Yanbo Liang <yb...@gmail.com>
Authored: Wed Oct 28 08:50:21 2015 -0700
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Wed Oct 28 08:50:21 2015 -0700

----------------------------------------------------------------------
 R/pkg/R/mllib.R                                                 | 4 ++--
 mllib/src/main/scala/org/apache/spark/ml/r/SparkRWrappers.scala | 3 +++
 2 files changed, 5 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/fba9e954/R/pkg/R/mllib.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 25615e8..aadd5b8 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -46,11 +46,11 @@ setClass("PipelineModel", representation(model = "jobj"))
 #'}
 setMethod("glm", signature(formula = "formula", family = "ANY", data = "DataFrame"),
           function(formula, family = c("gaussian", "binomial"), data, lambda = 0, alpha = 0,
-            solver = "auto") {
+            standardize = TRUE, solver = "auto") {
             family <- match.arg(family)
             model <- callJStatic("org.apache.spark.ml.api.r.SparkRWrappers",
                                  "fitRModelFormula", deparse(formula), data@sdf, family, lambda,
-                                 alpha, solver)
+                                 alpha, standardize, solver)
             return(new("PipelineModel", model = model))
           })
 

http://git-wip-us.apache.org/repos/asf/spark/blob/fba9e954/mllib/src/main/scala/org/apache/spark/ml/r/SparkRWrappers.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/SparkRWrappers.scala b/mllib/src/main/scala/org/apache/spark/ml/r/SparkRWrappers.scala
index fec61fe..21ebf6d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/SparkRWrappers.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/SparkRWrappers.scala
@@ -31,6 +31,7 @@ private[r] object SparkRWrappers {
       family: String,
       lambda: Double,
       alpha: Double,
+      standardize: Boolean,
       solver: String): PipelineModel = {
     val formula = new RFormula().setFormula(value)
     val estimator = family match {
@@ -38,11 +39,13 @@ private[r] object SparkRWrappers {
         .setRegParam(lambda)
         .setElasticNetParam(alpha)
         .setFitIntercept(formula.hasIntercept)
+        .setStandardization(standardize)
         .setSolver(solver)
       case "binomial" => new LogisticRegression()
         .setRegParam(lambda)
         .setElasticNetParam(alpha)
         .setFitIntercept(formula.hasIntercept)
+        .setStandardization(standardize)
     }
     val pipeline = new Pipeline().setStages(Array(formula, estimator))
     pipeline.fit(df)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org