You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by fe...@apache.org on 2017/09/10 17:24:59 UTC

[9/9] spark git commit: [SPARKR][BACKPORT-2.1] backporting package and test changes

[SPARKR][BACKPORT-2.1] backporting package and test changes

## What changes were proposed in this pull request?

cherrypick or manually porting changes to 2.1

## How was this patch tested?

Jenkins

Author: Felix Cheung <fe...@hotmail.com>
Author: hyukjinkwon <gu...@gmail.com>
Author: Wayne Zhang <ac...@uber.com>

Closes #19165 from felixcheung/rbackportpkg21.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ae4e8ae4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ae4e8ae4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ae4e8ae4

Branch: refs/heads/branch-2.1
Commit: ae4e8ae41d1ba135159afe9ffb1302971343efd1
Parents: 6a8a726
Author: Felix Cheung <fe...@hotmail.com>
Authored: Sun Sep 10 10:24:46 2017 -0700
Committer: Felix Cheung <fe...@apache.org>
Committed: Sun Sep 10 10:24:46 2017 -0700

----------------------------------------------------------------------
 R/pkg/.Rbuildignore                             |    2 +
 R/pkg/DESCRIPTION                               |    2 +-
 R/pkg/R/install.R                               |    6 +-
 R/pkg/inst/tests/testthat/jarTest.R             |   32 -
 R/pkg/inst/tests/testthat/packageInAJarTest.R   |   30 -
 R/pkg/inst/tests/testthat/test_Serde.R          |   79 -
 R/pkg/inst/tests/testthat/test_Windows.R        |   27 -
 R/pkg/inst/tests/testthat/test_basic.R          |   72 +
 R/pkg/inst/tests/testthat/test_binaryFile.R     |   92 -
 .../inst/tests/testthat/test_binary_function.R  |  104 -
 R/pkg/inst/tests/testthat/test_broadcast.R      |   51 -
 R/pkg/inst/tests/testthat/test_client.R         |   43 -
 R/pkg/inst/tests/testthat/test_context.R        |  210 --
 R/pkg/inst/tests/testthat/test_includePackage.R |   60 -
 R/pkg/inst/tests/testthat/test_jvm_api.R        |   36 -
 R/pkg/inst/tests/testthat/test_mllib.R          | 1205 --------
 .../tests/testthat/test_parallelize_collect.R   |  112 -
 R/pkg/inst/tests/testthat/test_rdd.R            |  804 -----
 R/pkg/inst/tests/testthat/test_shuffle.R        |  224 --
 R/pkg/inst/tests/testthat/test_sparkR.R         |   46 -
 R/pkg/inst/tests/testthat/test_sparkSQL.R       | 2883 -----------------
 R/pkg/inst/tests/testthat/test_take.R           |   69 -
 R/pkg/inst/tests/testthat/test_textFile.R       |  164 -
 R/pkg/inst/tests/testthat/test_utils.R          |  242 --
 R/pkg/tests/fulltests/jarTest.R                 |   32 +
 R/pkg/tests/fulltests/packageInAJarTest.R       |   30 +
 R/pkg/tests/fulltests/test_Serde.R              |   79 +
 R/pkg/tests/fulltests/test_Windows.R            |   27 +
 R/pkg/tests/fulltests/test_binaryFile.R         |   92 +
 R/pkg/tests/fulltests/test_binary_function.R    |  104 +
 R/pkg/tests/fulltests/test_broadcast.R          |   51 +
 R/pkg/tests/fulltests/test_client.R             |   43 +
 R/pkg/tests/fulltests/test_context.R            |  210 ++
 R/pkg/tests/fulltests/test_includePackage.R     |   60 +
 R/pkg/tests/fulltests/test_jvm_api.R            |   36 +
 R/pkg/tests/fulltests/test_mllib.R              | 1205 ++++++++
 .../tests/fulltests/test_parallelize_collect.R  |  112 +
 R/pkg/tests/fulltests/test_rdd.R                |  804 +++++
 R/pkg/tests/fulltests/test_shuffle.R            |  224 ++
 R/pkg/tests/fulltests/test_sparkR.R             |   46 +
 R/pkg/tests/fulltests/test_sparkSQL.R           | 2887 ++++++++++++++++++
 R/pkg/tests/fulltests/test_take.R               |   69 +
 R/pkg/tests/fulltests/test_textFile.R           |  164 +
 R/pkg/tests/fulltests/test_utils.R              |  242 ++
 R/pkg/tests/run-all.R                           |   22 +-
 R/pkg/vignettes/sparkr-vignettes.Rmd            |   47 +-
 R/run-tests.sh                                  |    2 +-
 appveyor.yml                                    |    3 +
 .../apache/spark/deploy/SparkSubmitSuite.scala  |    6 +-
 49 files changed, 6653 insertions(+), 6539 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/.Rbuildignore
----------------------------------------------------------------------
diff --git a/R/pkg/.Rbuildignore b/R/pkg/.Rbuildignore
index f12f8c2..280ab58 100644
--- a/R/pkg/.Rbuildignore
+++ b/R/pkg/.Rbuildignore
@@ -6,3 +6,5 @@
 ^README\.Rmd$
 ^src-native$
 ^html$
+^tests/fulltests/*
+

http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/DESCRIPTION
----------------------------------------------------------------------
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 2d461ca..899d410 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -2,7 +2,7 @@ Package: SparkR
 Type: Package
 Version: 2.1.2
 Title: R Frontend for Apache Spark
-Description: The SparkR package provides an R Frontend for Apache Spark.
+Description: Provides an R Frontend for Apache Spark.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
                     email = "shivaram@cs.berkeley.edu"),
              person("Xiangrui", "Meng", role = "aut",

http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/R/install.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R
index 4ca7aa6..082ae7d 100644
--- a/R/pkg/R/install.R
+++ b/R/pkg/R/install.R
@@ -270,7 +270,11 @@ sparkCachePath <- function() {
   if (.Platform$OS.type == "windows") {
     winAppPath <- Sys.getenv("LOCALAPPDATA", unset = NA)
     if (is.na(winAppPath)) {
-      stop(paste("%LOCALAPPDATA% not found.",
+      message("%LOCALAPPDATA% not found. Falling back to %USERPROFILE%.")
+      winAppPath <- Sys.getenv("USERPROFILE", unset = NA)
+    }
+    if (is.na(winAppPath)) {
+      stop(paste("%LOCALAPPDATA% and %USERPROFILE% not found.",
                    "Please define the environment variable",
                    "or restart and enter an installation path in localDir."))
     } else {

http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/inst/tests/testthat/jarTest.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/testthat/jarTest.R b/R/pkg/inst/tests/testthat/jarTest.R
deleted file mode 100644
index c9615c8..0000000
--- a/R/pkg/inst/tests/testthat/jarTest.R
+++ /dev/null
@@ -1,32 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-library(SparkR)
-
-sc <- sparkR.session()
-
-helloTest <- SparkR:::callJStatic("sparkrtest.DummyClass",
-                                  "helloWorld",
-                                  "Dave")
-stopifnot(identical(helloTest, "Hello Dave"))
-
-basicFunction <- SparkR:::callJStatic("sparkrtest.DummyClass",
-                                      "addStuff",
-                                      2L,
-                                      2L)
-stopifnot(basicFunction == 4L)
-
-sparkR.session.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/inst/tests/testthat/packageInAJarTest.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/testthat/packageInAJarTest.R b/R/pkg/inst/tests/testthat/packageInAJarTest.R
deleted file mode 100644
index 4bc935c..0000000
--- a/R/pkg/inst/tests/testthat/packageInAJarTest.R
+++ /dev/null
@@ -1,30 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-library(SparkR)
-library(sparkPackageTest)
-
-sparkR.session()
-
-run1 <- myfunc(5L)
-
-run2 <- myfunc(-4L)
-
-sparkR.session.stop()
-
-if (run1 != 6) quit(save = "no", status = 1)
-
-if (run2 != -3) quit(save = "no", status = 1)

http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/inst/tests/testthat/test_Serde.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/testthat/test_Serde.R b/R/pkg/inst/tests/testthat/test_Serde.R
deleted file mode 100644
index b5f6f1b..0000000
--- a/R/pkg/inst/tests/testthat/test_Serde.R
+++ /dev/null
@@ -1,79 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-context("SerDe functionality")
-
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
-
-test_that("SerDe of primitive types", {
-  x <- callJStatic("SparkRHandler", "echo", 1L)
-  expect_equal(x, 1L)
-  expect_equal(class(x), "integer")
-
-  x <- callJStatic("SparkRHandler", "echo", 1)
-  expect_equal(x, 1)
-  expect_equal(class(x), "numeric")
-
-  x <- callJStatic("SparkRHandler", "echo", TRUE)
-  expect_true(x)
-  expect_equal(class(x), "logical")
-
-  x <- callJStatic("SparkRHandler", "echo", "abc")
-  expect_equal(x, "abc")
-  expect_equal(class(x), "character")
-})
-
-test_that("SerDe of list of primitive types", {
-  x <- list(1L, 2L, 3L)
-  y <- callJStatic("SparkRHandler", "echo", x)
-  expect_equal(x, y)
-  expect_equal(class(y[[1]]), "integer")
-
-  x <- list(1, 2, 3)
-  y <- callJStatic("SparkRHandler", "echo", x)
-  expect_equal(x, y)
-  expect_equal(class(y[[1]]), "numeric")
-
-  x <- list(TRUE, FALSE)
-  y <- callJStatic("SparkRHandler", "echo", x)
-  expect_equal(x, y)
-  expect_equal(class(y[[1]]), "logical")
-
-  x <- list("a", "b", "c")
-  y <- callJStatic("SparkRHandler", "echo", x)
-  expect_equal(x, y)
-  expect_equal(class(y[[1]]), "character")
-
-  # Empty list
-  x <- list()
-  y <- callJStatic("SparkRHandler", "echo", x)
-  expect_equal(x, y)
-})
-
-test_that("SerDe of list of lists", {
-  x <- list(list(1L, 2L, 3L), list(1, 2, 3),
-            list(TRUE, FALSE), list("a", "b", "c"))
-  y <- callJStatic("SparkRHandler", "echo", x)
-  expect_equal(x, y)
-
-  # List of empty lists
-  x <- list(list(), list())
-  y <- callJStatic("SparkRHandler", "echo", x)
-  expect_equal(x, y)
-})
-
-sparkR.session.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/inst/tests/testthat/test_Windows.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/testthat/test_Windows.R b/R/pkg/inst/tests/testthat/test_Windows.R
deleted file mode 100644
index 1d777dd..0000000
--- a/R/pkg/inst/tests/testthat/test_Windows.R
+++ /dev/null
@@ -1,27 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-context("Windows-specific tests")
-
-test_that("sparkJars tag in SparkContext", {
-  if (.Platform$OS.type != "windows") {
-    skip("This test is only for Windows, skipped")
-  }
-
-  testOutput <- launchScript("ECHO", "a/b/c", wait = TRUE)
-  abcPath <- testOutput[1]
-  expect_equal(abcPath, "a\\b\\c")
-})

http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/inst/tests/testthat/test_basic.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/testthat/test_basic.R b/R/pkg/inst/tests/testthat/test_basic.R
new file mode 100644
index 0000000..c092867
--- /dev/null
+++ b/R/pkg/inst/tests/testthat/test_basic.R
@@ -0,0 +1,72 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+context("basic tests for CRAN")
+
+test_that("create DataFrame from list or data.frame", {
+  sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
+
+  i <- 4
+  df <- createDataFrame(data.frame(dummy = 1:i))
+  expect_equal(count(df), i)
+
+  l <- list(list(a = 1, b = 2), list(a = 3, b = 4))
+  df <- createDataFrame(l)
+  expect_equal(columns(df), c("a", "b"))
+
+  a <- 1:3
+  b <- c("a", "b", "c")
+  ldf <- data.frame(a, b)
+  df <- createDataFrame(ldf)
+  expect_equal(columns(df), c("a", "b"))
+  expect_equal(dtypes(df), list(c("a", "int"), c("b", "string")))
+  expect_equal(count(df), 3)
+  ldf2 <- collect(df)
+  expect_equal(ldf$a, ldf2$a)
+
+  mtcarsdf <- createDataFrame(mtcars)
+  expect_equivalent(collect(mtcarsdf), mtcars)
+
+  bytes <- as.raw(c(1, 2, 3))
+  df <- createDataFrame(list(list(bytes)))
+  expect_equal(collect(df)[[1]][[1]], bytes)
+
+  sparkR.session.stop()
+})
+
+test_that("spark.glm and predict", {
+  sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
+
+  training <- suppressWarnings(createDataFrame(iris))
+  # gaussian family
+  model <- spark.glm(training, Sepal_Width ~ Sepal_Length + Species)
+  prediction <- predict(model, training)
+  expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), "double")
+  vals <- collect(select(prediction, "prediction"))
+  rVals <- predict(glm(Sepal.Width ~ Sepal.Length + Species, data = iris), iris)
+  expect_true(all(abs(rVals - vals) < 1e-6), rVals - vals)
+
+  # Gamma family
+  x <- runif(100, -1, 1)
+  y <- rgamma(100, rate = 10 / exp(0.5 + 1.2 * x), shape = 10)
+  df <- as.DataFrame(as.data.frame(list(x = x, y = y)))
+  model <- glm(y ~ x, family = Gamma, df)
+  out <- capture.output(print(summary(model)))
+  expect_true(any(grepl("Dispersion parameter for gamma family", out)))
+
+  sparkR.session.stop()
+})

http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/inst/tests/testthat/test_binaryFile.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/testthat/test_binaryFile.R b/R/pkg/inst/tests/testthat/test_binaryFile.R
deleted file mode 100644
index b5c279e..0000000
--- a/R/pkg/inst/tests/testthat/test_binaryFile.R
+++ /dev/null
@@ -1,92 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-context("functions on binary files")
-
-# JavaSparkContext handle
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
-sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
-
-mockFile <- c("Spark is pretty.", "Spark is awesome.")
-
-test_that("saveAsObjectFile()/objectFile() following textFile() works", {
-  fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp")
-  fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp")
-  writeLines(mockFile, fileName1)
-
-  rdd <- textFile(sc, fileName1, 1)
-  saveAsObjectFile(rdd, fileName2)
-  rdd <- objectFile(sc, fileName2)
-  expect_equal(collectRDD(rdd), as.list(mockFile))
-
-  unlink(fileName1)
-  unlink(fileName2, recursive = TRUE)
-})
-
-test_that("saveAsObjectFile()/objectFile() works on a parallelized list", {
-  fileName <- tempfile(pattern = "spark-test", fileext = ".tmp")
-
-  l <- list(1, 2, 3)
-  rdd <- parallelize(sc, l, 1)
-  saveAsObjectFile(rdd, fileName)
-  rdd <- objectFile(sc, fileName)
-  expect_equal(collectRDD(rdd), l)
-
-  unlink(fileName, recursive = TRUE)
-})
-
-test_that("saveAsObjectFile()/objectFile() following RDD transformations works", {
-  fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp")
-  fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp")
-  writeLines(mockFile, fileName1)
-
-  rdd <- textFile(sc, fileName1)
-
-  words <- flatMap(rdd, function(line) { strsplit(line, " ")[[1]] })
-  wordCount <- lapply(words, function(word) { list(word, 1L) })
-
-  counts <- reduceByKey(wordCount, "+", 2L)
-
-  saveAsObjectFile(counts, fileName2)
-  counts <- objectFile(sc, fileName2)
-
-  output <- collectRDD(counts)
-  expected <- list(list("awesome.", 1), list("Spark", 2), list("pretty.", 1),
-                    list("is", 2))
-  expect_equal(sortKeyValueList(output), sortKeyValueList(expected))
-
-  unlink(fileName1)
-  unlink(fileName2, recursive = TRUE)
-})
-
-test_that("saveAsObjectFile()/objectFile() works with multiple paths", {
-  fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp")
-  fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp")
-
-  rdd1 <- parallelize(sc, "Spark is pretty.")
-  saveAsObjectFile(rdd1, fileName1)
-  rdd2 <- parallelize(sc, "Spark is awesome.")
-  saveAsObjectFile(rdd2, fileName2)
-
-  rdd <- objectFile(sc, c(fileName1, fileName2))
-  expect_equal(countRDD(rdd), 2)
-
-  unlink(fileName1, recursive = TRUE)
-  unlink(fileName2, recursive = TRUE)
-})
-
-sparkR.session.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/inst/tests/testthat/test_binary_function.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/testthat/test_binary_function.R b/R/pkg/inst/tests/testthat/test_binary_function.R
deleted file mode 100644
index 59cb2e6..0000000
--- a/R/pkg/inst/tests/testthat/test_binary_function.R
+++ /dev/null
@@ -1,104 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-context("binary functions")
-
-# JavaSparkContext handle
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
-sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
-
-# Data
-nums <- 1:10
-rdd <- parallelize(sc, nums, 2L)
-
-# File content
-mockFile <- c("Spark is pretty.", "Spark is awesome.")
-
-test_that("union on two RDDs", {
-  actual <- collectRDD(unionRDD(rdd, rdd))
-  expect_equal(actual, as.list(rep(nums, 2)))
-
-  fileName <- tempfile(pattern = "spark-test", fileext = ".tmp")
-  writeLines(mockFile, fileName)
-
-  text.rdd <- textFile(sc, fileName)
-  union.rdd <- unionRDD(rdd, text.rdd)
-  actual <- collectRDD(union.rdd)
-  expect_equal(actual, c(as.list(nums), mockFile))
-  expect_equal(getSerializedMode(union.rdd), "byte")
-
-  rdd <- map(text.rdd, function(x) {x})
-  union.rdd <- unionRDD(rdd, text.rdd)
-  actual <- collectRDD(union.rdd)
-  expect_equal(actual, as.list(c(mockFile, mockFile)))
-  expect_equal(getSerializedMode(union.rdd), "byte")
-
-  unlink(fileName)
-})
-
-test_that("cogroup on two RDDs", {
-  rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4)))
-  rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3)))
-  cogroup.rdd <- cogroup(rdd1, rdd2, numPartitions = 2L)
-  actual <- collectRDD(cogroup.rdd)
-  expect_equal(actual,
-               list(list(1, list(list(1), list(2, 3))), list(2, list(list(4), list()))))
-
-  rdd1 <- parallelize(sc, list(list("a", 1), list("a", 4)))
-  rdd2 <- parallelize(sc, list(list("b", 2), list("a", 3)))
-  cogroup.rdd <- cogroup(rdd1, rdd2, numPartitions = 2L)
-  actual <- collectRDD(cogroup.rdd)
-
-  expected <- list(list("b", list(list(), list(2))), list("a", list(list(1, 4), list(3))))
-  expect_equal(sortKeyValueList(actual),
-               sortKeyValueList(expected))
-})
-
-test_that("zipPartitions() on RDDs", {
-  rdd1 <- parallelize(sc, 1:2, 2L)  # 1, 2
-  rdd2 <- parallelize(sc, 1:4, 2L)  # 1:2, 3:4
-  rdd3 <- parallelize(sc, 1:6, 2L)  # 1:3, 4:6
-  actual <- collectRDD(zipPartitions(rdd1, rdd2, rdd3,
-                                  func = function(x, y, z) { list(list(x, y, z))} ))
-  expect_equal(actual,
-               list(list(1, c(1, 2), c(1, 2, 3)), list(2, c(3, 4), c(4, 5, 6))))
-
-  mockFile <- c("Spark is pretty.", "Spark is awesome.")
-  fileName <- tempfile(pattern = "spark-test", fileext = ".tmp")
-  writeLines(mockFile, fileName)
-
-  rdd <- textFile(sc, fileName, 1)
-  actual <- collectRDD(zipPartitions(rdd, rdd,
-                                  func = function(x, y) { list(paste(x, y, sep = "\n")) }))
-  expected <- list(paste(mockFile, mockFile, sep = "\n"))
-  expect_equal(actual, expected)
-
-  rdd1 <- parallelize(sc, 0:1, 1)
-  actual <- collectRDD(zipPartitions(rdd1, rdd,
-                                  func = function(x, y) { list(x + nchar(y)) }))
-  expected <- list(0:1 + nchar(mockFile))
-  expect_equal(actual, expected)
-
-  rdd <- map(rdd, function(x) { x })
-  actual <- collectRDD(zipPartitions(rdd, rdd1,
-                                  func = function(x, y) { list(y + nchar(x)) }))
-  expect_equal(actual, expected)
-
-  unlink(fileName)
-})
-
-sparkR.session.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/inst/tests/testthat/test_broadcast.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/testthat/test_broadcast.R b/R/pkg/inst/tests/testthat/test_broadcast.R
deleted file mode 100644
index 65f204d..0000000
--- a/R/pkg/inst/tests/testthat/test_broadcast.R
+++ /dev/null
@@ -1,51 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-context("broadcast variables")
-
-# JavaSparkContext handle
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
-sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
-
-# Partitioned data
-nums <- 1:2
-rrdd <- parallelize(sc, nums, 2L)
-
-test_that("using broadcast variable", {
-  randomMat <- matrix(nrow = 10, ncol = 10, data = rnorm(100))
-  randomMatBr <- broadcast(sc, randomMat)
-
-  useBroadcast <- function(x) {
-    sum(SparkR:::value(randomMatBr) * x)
-  }
-  actual <- collectRDD(lapply(rrdd, useBroadcast))
-  expected <- list(sum(randomMat) * 1, sum(randomMat) * 2)
-  expect_equal(actual, expected)
-})
-
-test_that("without using broadcast variable", {
-  randomMat <- matrix(nrow = 10, ncol = 10, data = rnorm(100))
-
-  useBroadcast <- function(x) {
-    sum(randomMat * x)
-  }
-  actual <- collectRDD(lapply(rrdd, useBroadcast))
-  expected <- list(sum(randomMat) * 1, sum(randomMat) * 2)
-  expect_equal(actual, expected)
-})
-
-sparkR.session.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/inst/tests/testthat/test_client.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/testthat/test_client.R b/R/pkg/inst/tests/testthat/test_client.R
deleted file mode 100644
index 0cf25fe..0000000
--- a/R/pkg/inst/tests/testthat/test_client.R
+++ /dev/null
@@ -1,43 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-context("functions in client.R")
-
-test_that("adding spark-testing-base as a package works", {
-  args <- generateSparkSubmitArgs("", "", "", "",
-                                  "holdenk:spark-testing-base:1.3.0_0.0.5")
-  expect_equal(gsub("[[:space:]]", "", args),
-               gsub("[[:space:]]", "",
-                    "--packages holdenk:spark-testing-base:1.3.0_0.0.5"))
-})
-
-test_that("no package specified doesn't add packages flag", {
-  args <- generateSparkSubmitArgs("", "", "", "", "")
-  expect_equal(gsub("[[:space:]]", "", args),
-               "")
-})
-
-test_that("multiple packages don't produce a warning", {
-  expect_warning(generateSparkSubmitArgs("", "", "", "", c("A", "B")), NA)
-})
-
-test_that("sparkJars sparkPackages as character vectors", {
-  args <- generateSparkSubmitArgs("", "", c("one.jar", "two.jar", "three.jar"), "",
-                                  c("com.databricks:spark-avro_2.10:2.0.1"))
-  expect_match(args, "--jars one.jar,two.jar,three.jar")
-  expect_match(args, "--packages com.databricks:spark-avro_2.10:2.0.1")
-})

http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/inst/tests/testthat/test_context.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/testthat/test_context.R b/R/pkg/inst/tests/testthat/test_context.R
deleted file mode 100644
index c847113..0000000
--- a/R/pkg/inst/tests/testthat/test_context.R
+++ /dev/null
@@ -1,210 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-context("test functions in sparkR.R")
-
-test_that("Check masked functions", {
-  # Check that we are not masking any new function from base, stats, testthat unexpectedly
-  # NOTE: We should avoid adding entries to *namesOfMaskedCompletely* as masked functions make it
-  # hard for users to use base R functions. Please check when in doubt.
-  namesOfMaskedCompletely <- c("cov", "filter", "sample")
-  namesOfMasked <- c("describe", "cov", "filter", "lag", "na.omit", "predict", "sd", "var",
-                     "colnames", "colnames<-", "intersect", "rank", "rbind", "sample", "subset",
-                     "summary", "transform", "drop", "window", "as.data.frame", "union")
-  if (as.numeric(R.version$major) >= 3 && as.numeric(R.version$minor) >= 3) {
-    namesOfMasked <- c("endsWith", "startsWith", namesOfMasked)
-  }
-  masked <- conflicts(detail = TRUE)$`package:SparkR`
-  expect_true("describe" %in% masked)  # only when with testthat..
-  func <- lapply(masked, function(x) { capture.output(showMethods(x))[[1]] })
-  funcSparkROrEmpty <- grepl("\\(package SparkR\\)$|^$", func)
-  maskedBySparkR <- masked[funcSparkROrEmpty]
-  expect_equal(length(maskedBySparkR), length(namesOfMasked))
-  # make the 2 lists the same length so expect_equal will print their content
-  l <- max(length(maskedBySparkR), length(namesOfMasked))
-  length(maskedBySparkR) <- l
-  length(namesOfMasked) <- l
-  expect_equal(sort(maskedBySparkR, na.last = TRUE), sort(namesOfMasked, na.last = TRUE))
-  # above are those reported as masked when `library(SparkR)`
-  # note that many of these methods are still callable without base:: or stats:: prefix
-  # there should be a test for each of these, except followings, which are currently "broken"
-  funcHasAny <- unlist(lapply(masked, function(x) {
-                                        any(grepl("=\"ANY\"", capture.output(showMethods(x)[-1])))
-                                      }))
-  maskedCompletely <- masked[!funcHasAny]
-  expect_equal(length(maskedCompletely), length(namesOfMaskedCompletely))
-  l <- max(length(maskedCompletely), length(namesOfMaskedCompletely))
-  length(maskedCompletely) <- l
-  length(namesOfMaskedCompletely) <- l
-  expect_equal(sort(maskedCompletely, na.last = TRUE),
-               sort(namesOfMaskedCompletely, na.last = TRUE))
-})
-
-test_that("repeatedly starting and stopping SparkR", {
-  for (i in 1:4) {
-    sc <- suppressWarnings(sparkR.init())
-    rdd <- parallelize(sc, 1:20, 2L)
-    expect_equal(countRDD(rdd), 20)
-    suppressWarnings(sparkR.stop())
-  }
-})
-
-test_that("repeatedly starting and stopping SparkSession", {
-  for (i in 1:4) {
-    sparkR.session(enableHiveSupport = FALSE)
-    df <- createDataFrame(data.frame(dummy = 1:i))
-    expect_equal(count(df), i)
-    sparkR.session.stop()
-  }
-})
-
-test_that("rdd GC across sparkR.stop", {
-  sc <- sparkR.sparkContext() # sc should get id 0
-  rdd1 <- parallelize(sc, 1:20, 2L) # rdd1 should get id 1
-  rdd2 <- parallelize(sc, 1:10, 2L) # rdd2 should get id 2
-  sparkR.session.stop()
-
-  sc <- sparkR.sparkContext() # sc should get id 0 again
-
-  # GC rdd1 before creating rdd3 and rdd2 after
-  rm(rdd1)
-  gc()
-
-  rdd3 <- parallelize(sc, 1:20, 2L) # rdd3 should get id 1 now
-  rdd4 <- parallelize(sc, 1:10, 2L) # rdd4 should get id 2 now
-
-  rm(rdd2)
-  gc()
-
-  countRDD(rdd3)
-  countRDD(rdd4)
-  sparkR.session.stop()
-})
-
-test_that("job group functions can be called", {
-  sc <- sparkR.sparkContext()
-  setJobGroup("groupId", "job description", TRUE)
-  cancelJobGroup("groupId")
-  clearJobGroup()
-
-  suppressWarnings(setJobGroup(sc, "groupId", "job description", TRUE))
-  suppressWarnings(cancelJobGroup(sc, "groupId"))
-  suppressWarnings(clearJobGroup(sc))
-  sparkR.session.stop()
-})
-
-test_that("utility function can be called", {
-  sparkR.sparkContext()
-  setLogLevel("ERROR")
-  sparkR.session.stop()
-})
-
-test_that("getClientModeSparkSubmitOpts() returns spark-submit args from whitelist", {
-  e <- new.env()
-  e[["spark.driver.memory"]] <- "512m"
-  ops <- getClientModeSparkSubmitOpts("sparkrmain", e)
-  expect_equal("--driver-memory \"512m\" sparkrmain", ops)
-
-  e[["spark.driver.memory"]] <- "5g"
-  e[["spark.driver.extraClassPath"]] <- "/opt/class_path" # nolint
-  e[["spark.driver.extraJavaOptions"]] <- "-XX:+UseCompressedOops -XX:+UseCompressedStrings"
-  e[["spark.driver.extraLibraryPath"]] <- "/usr/local/hadoop/lib" # nolint
-  e[["random"]] <- "skipthis"
-  ops2 <- getClientModeSparkSubmitOpts("sparkr-shell", e)
-  # nolint start
-  expect_equal(ops2, paste0("--driver-class-path \"/opt/class_path\" --driver-java-options \"",
-                      "-XX:+UseCompressedOops -XX:+UseCompressedStrings\" --driver-library-path \"",
-                      "/usr/local/hadoop/lib\" --driver-memory \"5g\" sparkr-shell"))
-  # nolint end
-
-  e[["spark.driver.extraClassPath"]] <- "/" # too short
-  ops3 <- getClientModeSparkSubmitOpts("--driver-memory 4g sparkr-shell2", e)
-  # nolint start
-  expect_equal(ops3, paste0("--driver-java-options \"-XX:+UseCompressedOops ",
-                      "-XX:+UseCompressedStrings\" --driver-library-path \"/usr/local/hadoop/lib\"",
-                      " --driver-memory 4g sparkr-shell2"))
-  # nolint end
-})
-
-test_that("sparkJars sparkPackages as comma-separated strings", {
-  expect_warning(processSparkJars(" a, b "))
-  jars <- suppressWarnings(processSparkJars(" a, b "))
-  expect_equal(lapply(jars, basename), list("a", "b"))
-
-  jars <- suppressWarnings(processSparkJars(" abc ,, def "))
-  expect_equal(lapply(jars, basename), list("abc", "def"))
-
-  jars <- suppressWarnings(processSparkJars(c(" abc ,, def ", "", "xyz", " ", "a,b")))
-  expect_equal(lapply(jars, basename), list("abc", "def", "xyz", "a", "b"))
-
-  p <- processSparkPackages(c("ghi", "lmn"))
-  expect_equal(p, c("ghi", "lmn"))
-
-  # check normalizePath
-  f <- dir()[[1]]
-  expect_warning(processSparkJars(f), NA)
-  expect_match(processSparkJars(f), f)
-})
-
-test_that("spark.lapply should perform simple transforms", {
-  sparkR.sparkContext()
-  doubled <- spark.lapply(1:10, function(x) { 2 * x })
-  expect_equal(doubled, as.list(2 * 1:10))
-  sparkR.session.stop()
-})
-
-test_that("add and get file to be downloaded with Spark job on every node", {
-  sparkR.sparkContext()
-  # Test add file.
-  path <- tempfile(pattern = "hello", fileext = ".txt")
-  filename <- basename(path)
-  words <- "Hello World!"
-  writeLines(words, path)
-  spark.addFile(path)
-  download_path <- spark.getSparkFiles(filename)
-  expect_equal(readLines(download_path), words)
-
-  # Test spark.getSparkFiles works well on executors.
-  seq <- seq(from = 1, to = 10, length.out = 5)
-  f <- function(seq) { spark.getSparkFiles(filename) }
-  results <- spark.lapply(seq, f)
-  for (i in 1:5) { expect_equal(basename(results[[i]]), filename) }
-
-  unlink(path)
-
-  # Test add directory recursively.
-  path <- paste0(tempdir(), "/", "recursive_dir")
-  dir.create(path)
-  dir_name <- basename(path)
-  path1 <- paste0(path, "/", "hello.txt")
-  file.create(path1)
-  sub_path <- paste0(path, "/", "sub_hello")
-  dir.create(sub_path)
-  path2 <- paste0(sub_path, "/", "sub_hello.txt")
-  file.create(path2)
-  words <- "Hello World!"
-  sub_words <- "Sub Hello World!"
-  writeLines(words, path1)
-  writeLines(sub_words, path2)
-  spark.addFile(path, recursive = TRUE)
-  download_path1 <- spark.getSparkFiles(paste0(dir_name, "/", "hello.txt"))
-  expect_equal(readLines(download_path1), words)
-  download_path2 <- spark.getSparkFiles(paste0(dir_name, "/", "sub_hello/sub_hello.txt"))
-  expect_equal(readLines(download_path2), sub_words)
-  unlink(path, recursive = TRUE)
-  sparkR.session.stop()
-})

http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/inst/tests/testthat/test_includePackage.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/testthat/test_includePackage.R b/R/pkg/inst/tests/testthat/test_includePackage.R
deleted file mode 100644
index 563ea29..0000000
--- a/R/pkg/inst/tests/testthat/test_includePackage.R
+++ /dev/null
@@ -1,60 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-context("include R packages")
-
-# JavaSparkContext handle
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
-sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
-
-# Partitioned data
-nums <- 1:2
-rdd <- parallelize(sc, nums, 2L)
-
-test_that("include inside function", {
-  # Only run the test if plyr is installed.
-  if ("plyr" %in% rownames(installed.packages())) {
-    suppressPackageStartupMessages(library(plyr))
-    generateData <- function(x) {
-      suppressPackageStartupMessages(library(plyr))
-      attach(airquality)
-      result <- transform(Ozone, logOzone = log(Ozone))
-      result
-    }
-
-    data <- lapplyPartition(rdd, generateData)
-    actual <- collectRDD(data)
-  }
-})
-
-test_that("use include package", {
-  # Only run the test if plyr is installed.
-  if ("plyr" %in% rownames(installed.packages())) {
-    suppressPackageStartupMessages(library(plyr))
-    generateData <- function(x) {
-      attach(airquality)
-      result <- transform(Ozone, logOzone = log(Ozone))
-      result
-    }
-
-    includePackage(sc, plyr)
-    data <- lapplyPartition(rdd, generateData)
-    actual <- collectRDD(data)
-  }
-})
-
-sparkR.session.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/ae4e8ae4/R/pkg/inst/tests/testthat/test_jvm_api.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/testthat/test_jvm_api.R b/R/pkg/inst/tests/testthat/test_jvm_api.R
deleted file mode 100644
index 7348c89..0000000
--- a/R/pkg/inst/tests/testthat/test_jvm_api.R
+++ /dev/null
@@ -1,36 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-context("JVM API")
-
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
-
-test_that("Create and call methods on object", {
-  jarr <- sparkR.newJObject("java.util.ArrayList")
-  # Add an element to the array
-  sparkR.callJMethod(jarr, "add", 1L)
-  # Check if get returns the same element
-  expect_equal(sparkR.callJMethod(jarr, "get", 0L), 1L)
-})
-
-test_that("Call static methods", {
-  # Convert a boolean to a string
-  strTrue <- sparkR.callJStatic("java.lang.String", "valueOf", TRUE)
-  expect_equal(strTrue, "true")
-})
-
-sparkR.session.stop()


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org