You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2020/04/01 01:38:46 UTC
[spark] branch master updated: [SPARK-31290][R] Add back the
deprecated R APIs
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new fd0b228 [SPARK-31290][R] Add back the deprecated R APIs
fd0b228 is described below
commit fd0b2281272daba590c6bb277688087d0b26053f
Author: Huaxin Gao <hu...@us.ibm.com>
AuthorDate: Wed Apr 1 10:38:03 2020 +0900
[SPARK-31290][R] Add back the deprecated R APIs
### What changes were proposed in this pull request?
Add back the deprecated R APIs removed by https://github.com/apache/spark/pull/22843/ and https://github.com/apache/spark/pull/22815.
These APIs are
- `sparkR.init`
- `sparkRSQL.init`
- `sparkRHive.init`
- `registerTempTable`
- `createExternalTable`
- `dropTempTable`
No need to port the function such as
```r
createExternalTable <- function(x, ...) {
dispatchFunc("createExternalTable(tableName, path = NULL, source = NULL, ...)", x, ...)
}
```
because this was for the backward compatibility when SQLContext exists before assuming from https://github.com/apache/spark/pull/9192, but seems we don't need it anymore since SparkR replaced SQLContext with Spark Session at https://github.com/apache/spark/pull/13635.
### Why are the changes needed?
Amend Spark's Semantic Versioning Policy
### Does this PR introduce any user-facing change?
Yes
The removed R APIs are put back.
### How was this patch tested?
Add back the removed tests
Closes #28058 from huaxingao/r.
Authored-by: Huaxin Gao <hu...@us.ibm.com>
Signed-off-by: HyukjinKwon <gu...@apache.org>
---
R/pkg/NAMESPACE | 7 +++
R/pkg/R/DataFrame.R | 26 ++++++++++
R/pkg/R/catalog.R | 54 +++++++++++++++++++
R/pkg/R/generics.R | 3 ++
R/pkg/R/sparkR.R | 98 +++++++++++++++++++++++++++++++++++
R/pkg/tests/fulltests/test_sparkSQL.R | 13 ++++-
docs/sparkr-migration-guide.md | 3 +-
7 files changed, 200 insertions(+), 4 deletions(-)
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 56eceb8..fb879e4 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -28,6 +28,7 @@ importFrom("utils", "download.file", "object.size", "packageVersion", "tail", "u
# S3 methods exported
export("sparkR.session")
+export("sparkR.init")
export("sparkR.session.stop")
export("sparkR.stop")
export("sparkR.conf")
@@ -41,6 +42,9 @@ export("sparkR.callJStatic")
export("install.spark")
+export("sparkRSQL.init",
+ "sparkRHive.init")
+
# MLlib integration
exportMethods("glm",
"spark.glm",
@@ -148,6 +152,7 @@ exportMethods("arrange",
"printSchema",
"randomSplit",
"rbind",
+ "registerTempTable",
"rename",
"repartition",
"repartitionByRange",
@@ -431,8 +436,10 @@ export("as.DataFrame",
"cacheTable",
"clearCache",
"createDataFrame",
+ "createExternalTable",
"createTable",
"currentDatabase",
+ "dropTempTable",
"dropTempView",
"listColumns",
"listDatabases",
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 593d3ca..14d2076 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -521,6 +521,32 @@ setMethod("createOrReplaceTempView",
invisible(callJMethod(x@sdf, "createOrReplaceTempView", viewName))
})
+#' (Deprecated) Register Temporary Table
+#'
+#' Registers a SparkDataFrame as a Temporary Table in the SparkSession
+#' @param x A SparkDataFrame
+#' @param tableName A character vector containing the name of the table
+#'
+#' @seealso \link{createOrReplaceTempView}
+#' @rdname registerTempTable-deprecated
+#' @name registerTempTable
+#' @aliases registerTempTable,SparkDataFrame,character-method
+#' @examples
+#'\dontrun{
+#' sparkR.session()
+#' path <- "path/to/file.json"
+#' df <- read.json(path)
+#' registerTempTable(df, "json_df")
+#' new_df <- sql("SELECT * FROM json_df")
+#'}
+#' @note registerTempTable since 1.4.0
+setMethod("registerTempTable",
+ signature(x = "SparkDataFrame", tableName = "character"),
+ function(x, tableName) {
+ .Deprecated("createOrReplaceTempView")
+ invisible(callJMethod(x@sdf, "createOrReplaceTempView", tableName))
+ })
+
#' insertInto
#'
#' Insert the contents of a SparkDataFrame into a table registered in the current SparkSession.
diff --git a/R/pkg/R/catalog.R b/R/pkg/R/catalog.R
index 7641f8a..275737f 100644
--- a/R/pkg/R/catalog.R
+++ b/R/pkg/R/catalog.R
@@ -17,6 +17,35 @@
# catalog.R: SparkSession catalog functions
+#' (Deprecated) Create an external table
+#'
+#' Creates an external table based on the dataset in a data source,
+#' Returns a SparkDataFrame associated with the external table.
+#'
+#' The data source is specified by the \code{source} and a set of options(...).
+#' If \code{source} is not specified, the default data source configured by
+#' "spark.sql.sources.default" will be used.
+#'
+#' @param tableName a name of the table.
+#' @param path the path of files to load.
+#' @param source the name of external data source.
+#' @param schema the schema of the data required for some data sources.
+#' @param ... additional argument(s) passed to the method.
+#' @return A SparkDataFrame.
+#' @rdname createExternalTable-deprecated
+#' @seealso \link{createTable}
+#' @examples
+#'\dontrun{
+#' sparkR.session()
+#' df <- createExternalTable("myjson", path="path/to/json", source="json", schema)
+#' }
+#' @name createExternalTable
+#' @note createExternalTable since 1.4.0
+createExternalTable <- function(tableName, path = NULL, source = NULL, schema = NULL, ...) {
+ .Deprecated("createTable", old = "createExternalTable")
+ createTable(tableName, path, source, schema, ...)
+}
+
#' Creates a table based on the dataset in a data source
#'
#' Creates a table based on the dataset in a data source. Returns a SparkDataFrame associated with
@@ -130,6 +159,31 @@ clearCache <- function() {
invisible(callJMethod(catalog, "clearCache"))
}
+#' (Deprecated) Drop Temporary Table
+#'
+#' Drops the temporary table with the given table name in the catalog.
+#' If the table has been cached/persisted before, it's also unpersisted.
+#'
+#' @param tableName The name of the SparkSQL table to be dropped.
+#' @seealso \link{dropTempView}
+#' @rdname dropTempTable-deprecated
+#' @examples
+#' \dontrun{
+#' sparkR.session()
+#' df <- read.df(path, "parquet")
+#' createOrReplaceTempView(df, "table")
+#' dropTempTable("table")
+#' }
+#' @name dropTempTable
+#' @note dropTempTable since 1.4.0
+dropTempTable <- function(tableName) {
+ .Deprecated("dropTempView", old = "dropTempTable")
+ if (class(tableName) != "character") {
+ stop("tableName must be a string.")
+ }
+ dropTempView(tableName)
+}
+
#' Drops the temporary view with the given view name in the catalog.
#'
#' Drops the temporary view with the given view name in the catalog.
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 6f6ef6f..d924b2a 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -528,6 +528,9 @@ setGeneric("persist", function(x, newLevel) { standardGeneric("persist") })
#' @rdname printSchema
setGeneric("printSchema", function(x) { standardGeneric("printSchema") })
+#' @rdname registerTempTable-deprecated
+setGeneric("registerTempTable", function(x, tableName) { standardGeneric("registerTempTable") })
+
#' @rdname rename
setGeneric("rename", function(x, ...) { standardGeneric("rename") })
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index cdb5909..cc8c92b 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -88,6 +88,49 @@ sparkR.stop <- function() {
sparkR.session.stop()
}
+#' (Deprecated) Initialize a new Spark Context
+#'
+#' This function initializes a new SparkContext.
+#'
+#' @param master The Spark master URL
+#' @param appName Application name to register with cluster manager
+#' @param sparkHome Spark Home directory
+#' @param sparkEnvir Named list of environment variables to set on worker nodes
+#' @param sparkExecutorEnv Named list of environment variables to be used when launching executors
+#' @param sparkJars Character vector of jar files to pass to the worker nodes
+#' @param sparkPackages Character vector of package coordinates
+#' @seealso \link{sparkR.session}
+#' @rdname sparkR.init-deprecated
+#' @examples
+#'\dontrun{
+#' sc <- sparkR.init("local[2]", "SparkR", "/home/spark")
+#' sc <- sparkR.init("local[2]", "SparkR", "/home/spark",
+#' list(spark.executor.memory="1g"))
+#' sc <- sparkR.init("yarn-client", "SparkR", "/home/spark",
+#' list(spark.executor.memory="4g"),
+#' list(LD_LIBRARY_PATH="/directory of JVM libraries (libjvm.so) on workers/"),
+#' c("one.jar", "two.jar", "three.jar"),
+#' c("com.databricks:spark-avro_2.11:2.0.1"))
+#'}
+#' @note sparkR.init since 1.4.0
+sparkR.init <- function(
+ master = "",
+ appName = "SparkR",
+ sparkHome = Sys.getenv("SPARK_HOME"),
+ sparkEnvir = list(),
+ sparkExecutorEnv = list(),
+ sparkJars = "",
+ sparkPackages = "") {
+ .Deprecated("sparkR.session")
+ sparkR.sparkContext(master,
+ appName,
+ sparkHome,
+ convertNamedListToEnv(sparkEnvir),
+ convertNamedListToEnv(sparkExecutorEnv),
+ sparkJars,
+ sparkPackages)
+}
+
# Internal function to handle creating the SparkContext.
sparkR.sparkContext <- function(
master = "",
@@ -229,6 +272,61 @@ sparkR.sparkContext <- function(
sc
}
+#' (Deprecated) Initialize a new SQLContext
+#'
+#' This function creates a SparkContext from an existing JavaSparkContext and
+#' then uses it to initialize a new SQLContext
+#'
+#' Starting SparkR 2.0, a SparkSession is initialized and returned instead.
+#' This API is deprecated and kept for backward compatibility only.
+#'
+#' @param jsc The existing JavaSparkContext created with SparkR.init()
+#' @seealso \link{sparkR.session}
+#' @rdname sparkRSQL.init-deprecated
+#' @examples
+#'\dontrun{
+#' sc <- sparkR.init()
+#' sqlContext <- sparkRSQL.init(sc)
+#'}
+#' @note sparkRSQL.init since 1.4.0
+sparkRSQL.init <- function(jsc = NULL) {
+ .Deprecated("sparkR.session")
+
+ if (exists(".sparkRsession", envir = .sparkREnv)) {
+ return(get(".sparkRsession", envir = .sparkREnv))
+ }
+
+ # Default to without Hive support for backward compatibility.
+ sparkR.session(enableHiveSupport = FALSE)
+}
+
+#' (Deprecated) Initialize a new HiveContext
+#'
+#' This function creates a HiveContext from an existing JavaSparkContext
+#'
+#' Starting SparkR 2.0, a SparkSession is initialized and returned instead.
+#' This API is deprecated and kept for backward compatibility only.
+#'
+#' @param jsc The existing JavaSparkContext created with SparkR.init()
+#' @seealso \link{sparkR.session}
+#' @rdname sparkRHive.init-deprecated
+#' @examples
+#'\dontrun{
+#' sc <- sparkR.init()
+#' sqlContext <- sparkRHive.init(sc)
+#'}
+#' @note sparkRHive.init since 1.4.0
+sparkRHive.init <- function(jsc = NULL) {
+ .Deprecated("sparkR.session")
+
+ if (exists(".sparkRsession", envir = .sparkREnv)) {
+ return(get(".sparkRsession", envir = .sparkREnv))
+ }
+
+ # Default to without Hive support for backward compatibility.
+ sparkR.session(enableHiveSupport = TRUE)
+}
+
#' Get the existing SparkSession or initialize a new SparkSession.
#'
#' SparkSession is the entry point into SparkR. \code{sparkR.session} gets the existing
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index a552729..c892feb 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -106,6 +106,15 @@ if (is_windows()) {
Sys.setenv(TZ = "GMT")
}
+test_that("calling sparkRSQL.init returns existing SQL context", {
+ sqlContext <- suppressWarnings(sparkRSQL.init(sc))
+ expect_equal(suppressWarnings(sparkRSQL.init(sc)), sqlContext)
+})
+
+test_that("calling sparkRSQL.init returns existing SparkSession", {
+ expect_equal(suppressWarnings(sparkRSQL.init(sc)), sparkSession)
+})
+
test_that("calling sparkR.session returns existing SparkSession", {
expect_equal(sparkR.session(), sparkSession)
})
@@ -656,10 +665,10 @@ test_that("test tableNames and tables", {
expect_true("tableName" %in% colnames(tables()))
expect_true(all(c("tableName", "database", "isTemporary") %in% colnames(tables())))
- createOrReplaceTempView(df, "table2")
+ suppressWarnings(registerTempTable(df, "table2"))
tables <- listTables()
expect_equal(count(tables), count + 2)
- dropTempView("table1")
+ suppressWarnings(dropTempTable("table1"))
expect_true(dropTempView("table2"))
tables <- listTables()
diff --git a/docs/sparkr-migration-guide.md b/docs/sparkr-migration-guide.md
index 6fbc4c0..32836cd 100644
--- a/docs/sparkr-migration-guide.md
+++ b/docs/sparkr-migration-guide.md
@@ -28,8 +28,7 @@ Please refer [Migration Guide: SQL, Datasets and DataFrame](sql-migration-guide.
## Upgrading from SparkR 2.4 to 3.0
- - The deprecated methods `sparkR.init`, `sparkRSQL.init`, `sparkRHive.init` have been removed. Use `sparkR.session` instead.
- - The deprecated methods `parquetFile`, `saveAsParquetFile`, `jsonFile`, `registerTempTable`, `createExternalTable`, and `dropTempTable` have been removed. Use `read.parquet`, `write.parquet`, `read.json`, `createOrReplaceTempView`, `createTable`, `dropTempView`, `union` instead.
+ - The deprecated methods `parquetFile`, `saveAsParquetFile`, `jsonFile`, `jsonRDD` have been removed. Use `read.parquet`, `write.parquet`, `read.json` instead.
## Upgrading from SparkR 2.3 to 2.4
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org