You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by jk...@apache.org on 2016/04/26 22:58:33 UTC
spark git commit: [SPARK-14907][MLLIB] Use repartition in
GLMRegressionModel.save
Repository: spark
Updated Branches:
refs/heads/master 7131b03bc -> e4f3eec5b
[SPARK-14907][MLLIB] Use repartition in GLMRegressionModel.save
## What changes were proposed in this pull request?
This PR changes `GLMRegressionModel.save` function like the following code that is similar to other algorithms' parquet write.
```
- val dataRDD: DataFrame = sc.parallelize(Seq(data), 1).toDF()
- // TODO: repartition with 1 partition after SPARK-5532 gets fixed
- dataRDD.write.parquet(Loader.dataPath(path))
+ sqlContext.createDataFrame(Seq(data)).repartition(1).write.parquet(Loader.dataPath(path))
```
## How was this patch tested?
Manual.
Author: Dongjoon Hyun <do...@apache.org>
Closes #12676 from dongjoon-hyun/SPARK-14907.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e4f3eec5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e4f3eec5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e4f3eec5
Branch: refs/heads/master
Commit: e4f3eec5b750389ce3f7c6d023bb7db3b0f8cf29
Parents: 7131b03
Author: Dongjoon Hyun <do...@apache.org>
Authored: Tue Apr 26 13:58:29 2016 -0700
Committer: Joseph K. Bradley <jo...@databricks.com>
Committed: Tue Apr 26 13:58:29 2016 -0700
----------------------------------------------------------------------
.../apache/spark/mllib/regression/impl/GLMRegressionModel.scala | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/e4f3eec5/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala
index a6e1767..7696fdf 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala
@@ -48,7 +48,6 @@ private[regression] object GLMRegressionModel {
weights: Vector,
intercept: Double): Unit = {
val sqlContext = SQLContext.getOrCreate(sc)
- import sqlContext.implicits._
// Create JSON metadata.
val metadata = compact(render(
@@ -58,9 +57,7 @@ private[regression] object GLMRegressionModel {
// Create Parquet data.
val data = Data(weights, intercept)
- val dataRDD: DataFrame = sc.parallelize(Seq(data), 1).toDF()
- // TODO: repartition with 1 partition after SPARK-5532 gets fixed
- dataRDD.write.parquet(Loader.dataPath(path))
+ sqlContext.createDataFrame(Seq(data)).repartition(1).write.parquet(Loader.dataPath(path))
}
/**
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org