You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sa...@apache.org on 2016/09/07 10:22:42 UTC

spark git commit: [SPARK-17339][SPARKR][CORE] Fix some R tests and use Path.toUri in SparkContext for Windows paths in SparkR

Repository: spark
Updated Branches:
  refs/heads/master 3ce3a282c -> 6b41195bc


[SPARK-17339][SPARKR][CORE] Fix some R tests and use Path.toUri in SparkContext for Windows paths in SparkR

## What changes were proposed in this pull request?

This PR fixes the Windows path issues in several APIs. Please refer https://issues.apache.org/jira/browse/SPARK-17339 for more details.

## How was this patch tested?

Tests via AppVeyor CI - https://ci.appveyor.com/project/HyukjinKwon/spark/build/82-SPARK-17339-fix-r

Also, manually,

![2016-09-06 3 14 38](https://cloud.githubusercontent.com/assets/6477701/18263406/b93a98be-7444-11e6-9521-b28ee65a4771.png)

Author: hyukjinkwon <gu...@gmail.com>

Closes #14960 from HyukjinKwon/SPARK-17339.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6b41195b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6b41195b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6b41195b

Branch: refs/heads/master
Commit: 6b41195bca65de6236168d96758f93b85f1dd7ca
Parents: 3ce3a28
Author: hyukjinkwon <gu...@gmail.com>
Authored: Wed Sep 7 19:24:03 2016 +0900
Committer: Kousuke Saruta <sa...@oss.nttdata.co.jp>
Committed: Wed Sep 7 19:24:03 2016 +0900

----------------------------------------------------------------------
 R/pkg/inst/tests/testthat/test_mllib.R                | 14 ++++++++++----
 .../main/scala/org/apache/spark/SparkContext.scala    |  4 ++--
 2 files changed, 12 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/6b41195b/R/pkg/inst/tests/testthat/test_mllib.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index ca25f2c..ac896cf 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -22,6 +22,11 @@ context("MLlib functions")
 # Tests for MLlib functions in SparkR
 sparkSession <- sparkR.session(enableHiveSupport = FALSE)
 
+absoluteSparkPath <- function(x) {
+  sparkHome <- sparkR.conf("spark.home")
+  file.path(sparkHome, x)
+}
+
 test_that("formula of spark.glm", {
   training <- suppressWarnings(createDataFrame(iris))
   # directly calling the spark API
@@ -354,7 +359,8 @@ test_that("spark.kmeans", {
 })
 
 test_that("spark.mlp", {
-  df <- read.df("data/mllib/sample_multiclass_classification_data.txt", source = "libsvm")
+  df <- read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"),
+                source = "libsvm")
   model <- spark.mlp(df, blockSize = 128, layers = c(4, 5, 4, 3), solver = "l-bfgs", maxIter = 100,
                      tol = 0.5, stepSize = 1, seed = 1)
 
@@ -616,7 +622,7 @@ test_that("spark.gaussianMixture", {
 })
 
 test_that("spark.lda with libsvm", {
-  text <- read.df("data/mllib/sample_lda_libsvm_data.txt", source = "libsvm")
+  text <- read.df(absoluteSparkPath("data/mllib/sample_lda_libsvm_data.txt"), source = "libsvm")
   model <- spark.lda(text, optimizer = "em")
 
   stats <- summary(model, 10)
@@ -652,7 +658,7 @@ test_that("spark.lda with libsvm", {
 })
 
 test_that("spark.lda with text input", {
-  text <- read.text("data/mllib/sample_lda_data.txt")
+  text <- read.text(absoluteSparkPath("data/mllib/sample_lda_data.txt"))
   model <- spark.lda(text, optimizer = "online", features = "value")
 
   stats <- summary(model)
@@ -688,7 +694,7 @@ test_that("spark.lda with text input", {
 })
 
 test_that("spark.posterior and spark.perplexity", {
-  text <- read.text("data/mllib/sample_lda_data.txt")
+  text <- read.text(absoluteSparkPath("data/mllib/sample_lda_data.txt"))
   model <- spark.lda(text, features = "value", k = 3)
 
   # Assert perplexities are equal

http://git-wip-us.apache.org/repos/asf/spark/blob/6b41195b/core/src/main/scala/org/apache/spark/SparkContext.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 744d5d0..4aa795a 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -992,7 +992,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
 
     // This is a hack to enforce loading hdfs-site.xml.
     // See SPARK-11227 for details.
-    FileSystem.get(new URI(path), hadoopConfiguration)
+    FileSystem.getLocal(hadoopConfiguration)
 
     // A Hadoop configuration can be about 10 KB, which is pretty big, so broadcast it.
     val confBroadcast = broadcast(new SerializableConfiguration(hadoopConfiguration))
@@ -1081,7 +1081,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
 
     // This is a hack to enforce loading hdfs-site.xml.
     // See SPARK-11227 for details.
-    FileSystem.get(new URI(path), hadoopConfiguration)
+    FileSystem.getLocal(hadoopConfiguration)
 
     // The call to NewHadoopJob automatically adds security credentials to conf,
     // so we don't need to explicitly add them ourselves


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org