You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2023/06/21 02:11:07 UTC
[spark] branch master updated: [SPARK-44125][R] Support Java 21 in SparkR
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 95f071cf5f3 [SPARK-44125][R] Support Java 21 in SparkR
95f071cf5f3 is described below
commit 95f071cf5f34d73d193b9c4f28f5459fa92aaeef
Author: Dongjoon Hyun <do...@apache.org>
AuthorDate: Wed Jun 21 11:10:54 2023 +0900
[SPARK-44125][R] Support Java 21 in SparkR
### What changes were proposed in this pull request?
This PR aims to support Java 21 in SparkR. Arrow-related issue will be fixed when we upgrade Arrow library. Also, the following JIRA is created to re-enable them even in Java 21.
- SPARK-44127 Reenable `test_sparkSQL_arrow.R` in Java 21
### Why are the changes needed?
To be ready for Java 21.
### Does this PR introduce _any_ user-facing change?
No, this is additional support.
### How was this patch tested?
Pass the CIs and do manual tests.
```
$ java -version
openjdk version "21-ea" 2023-09-19
OpenJDK Runtime Environment (build 21-ea+27-2343)
OpenJDK 64-Bit Server VM (build 21-ea+27-2343, mixed mode, sharing)
$ build/sbt test:package -Psparkr -Phive
$ R/install-dev.sh; R/run-tests.sh
...
══ Skipped ═════════════════════════════════════════════════════════════════════
1. createDataFrame/collect Arrow optimization ('test_sparkSQL_arrow.R:29:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE
2. createDataFrame/collect Arrow optimization - many partitions (partition order test) ('test_sparkSQL_arrow.R:47:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE
3. createDataFrame/collect Arrow optimization - type specification ('test_sparkSQL_arrow.R:54:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE
4. dapply() Arrow optimization ('test_sparkSQL_arrow.R:79:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE
5. dapply() Arrow optimization - type specification ('test_sparkSQL_arrow.R:114:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE
6. dapply() Arrow optimization - type specification (date and timestamp) ('test_sparkSQL_arrow.R:144:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE
7. gapply() Arrow optimization ('test_sparkSQL_arrow.R:154:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE
8. gapply() Arrow optimization - type specification ('test_sparkSQL_arrow.R:198:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE
9. gapply() Arrow optimization - type specification (date and timestamp) ('test_sparkSQL_arrow.R:231:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE
10. Arrow optimization - unsupported types ('test_sparkSQL_arrow.R:243:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE
11. SPARK-32478: gapply() Arrow optimization - error message for schema mismatch ('test_sparkSQL_arrow.R:255:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE
12. SPARK-43789: Automatically pick the number of partitions based on Arrow batch size ('test_sparkSQL_arrow.R:265:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE
13. sparkJars tag in SparkContext ('test_Windows.R:22:5') - Reason: This test is only for Windows, skipped
══ DONE ════════════════════════════════════════════════════════════════════════
...
* DONE
Status: 2 NOTEs
See
‘/Users/dongjoon/APACHE/spark-merge/R/SparkR.Rcheck/00check.log’
for details.
+ popd
Tests passed.
```
Closes #41680 from dongjoon-hyun/SPARK-44125.
Authored-by: Dongjoon Hyun <do...@apache.org>
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
R/pkg/R/client.R | 6 ++++--
R/pkg/tests/fulltests/test_sparkSQL_arrow.R | 24 ++++++++++++++++++++++++
2 files changed, 28 insertions(+), 2 deletions(-)
diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R
index 797a5c7da15..88f9e9fe857 100644
--- a/R/pkg/R/client.R
+++ b/R/pkg/R/client.R
@@ -93,8 +93,10 @@ checkJavaVersion <- function() {
}, javaVersionOut)
javaVersionStr <- strsplit(javaVersionFilter[[1]], '"', fixed = TRUE)[[1L]][2]
- # javaVersionStr is of the form 1.8.0_92/9.0.x/11.0.x.
- # We are using 8, 9, 10, 11 for sparkJavaVersion.
+ # javaVersionStr is of the form 1.8.0_92/11.0.x./17.0.x/21-ea/21
+ # We are using 8, 11, 17, and 21 for sparkJavaVersion.
+ javaVersionStr <- strsplit(javaVersionStr, "-ea", fixed = TRUE)[[1L]]
+
versions <- strsplit(javaVersionStr, ".", fixed = TRUE)[[1L]]
if ("1" == versions[1]) {
javaVersionNum <- as.integer(versions[2])
diff --git a/R/pkg/tests/fulltests/test_sparkSQL_arrow.R b/R/pkg/tests/fulltests/test_sparkSQL_arrow.R
index 1ec64077b48..4c3272f9034 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL_arrow.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL_arrow.R
@@ -26,6 +26,8 @@ sparkSession <- sparkR.session(
test_that("createDataFrame/collect Arrow optimization", {
skip_if_not_installed("arrow")
+ # TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
+ skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
conf <- callJMethod(sparkSession, "conf")
arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.sparkr.enabled")[[1]]
@@ -43,12 +45,16 @@ test_that("createDataFrame/collect Arrow optimization", {
test_that("createDataFrame/collect Arrow optimization - many partitions (partition order test)", {
skip_if_not_installed("arrow")
+ # TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
+ skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
expect_equal(collect(createDataFrame(mtcars, numPartitions = 32)),
collect(createDataFrame(mtcars, numPartitions = 1)))
})
test_that("createDataFrame/collect Arrow optimization - type specification", {
skip_if_not_installed("arrow")
+ # TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
+ skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
rdf <- data.frame(list(list(a = 1,
b = "a",
c = TRUE,
@@ -73,6 +79,8 @@ test_that("createDataFrame/collect Arrow optimization - type specification", {
test_that("dapply() Arrow optimization", {
skip_if_not_installed("arrow")
+ # TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
+ skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
df <- createDataFrame(mtcars)
conf <- callJMethod(sparkSession, "conf")
@@ -107,6 +115,8 @@ test_that("dapply() Arrow optimization", {
test_that("dapply() Arrow optimization - type specification", {
skip_if_not_installed("arrow")
+ # TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
+ skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
# Note that regular dapply() seems not supporting date and timestamps
# whereas Arrow-optimized dapply() does.
rdf <- data.frame(list(list(a = 1,
@@ -136,6 +146,8 @@ test_that("dapply() Arrow optimization - type specification", {
test_that("dapply() Arrow optimization - type specification (date and timestamp)", {
skip_if_not_installed("arrow")
+ # TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
+ skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
rdf <- data.frame(list(list(a = as.Date("1990-02-24"),
b = as.POSIXct("1990-02-24 12:34:56"))))
df <- createDataFrame(rdf)
@@ -145,6 +157,8 @@ test_that("dapply() Arrow optimization - type specification (date and timestamp)
test_that("gapply() Arrow optimization", {
skip_if_not_installed("arrow")
+ # TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
+ skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
df <- createDataFrame(mtcars)
conf <- callJMethod(sparkSession, "conf")
@@ -188,6 +202,8 @@ test_that("gapply() Arrow optimization", {
test_that("gapply() Arrow optimization - type specification", {
skip_if_not_installed("arrow")
+ # TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
+ skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
# Note that regular gapply() seems not supporting date and timestamps
# whereas Arrow-optimized gapply() does.
rdf <- data.frame(list(list(a = 1,
@@ -220,6 +236,8 @@ test_that("gapply() Arrow optimization - type specification", {
test_that("gapply() Arrow optimization - type specification (date and timestamp)", {
skip_if_not_installed("arrow")
+ # TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
+ skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
rdf <- data.frame(list(list(a = as.Date("1990-02-24"),
b = as.POSIXct("1990-02-24 12:34:56"))))
df <- createDataFrame(rdf)
@@ -231,6 +249,8 @@ test_that("gapply() Arrow optimization - type specification (date and timestamp)
test_that("Arrow optimization - unsupported types", {
skip_if_not_installed("arrow")
+ # TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
+ skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
expect_error(checkSchemaInArrow(structType("a FLOAT")), "not support float type")
expect_error(checkSchemaInArrow(structType("a BINARY")), "not support binary type")
@@ -242,6 +262,8 @@ test_that("Arrow optimization - unsupported types", {
test_that("SPARK-32478: gapply() Arrow optimization - error message for schema mismatch", {
skip_if_not_installed("arrow")
+ # TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
+ skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
df <- createDataFrame(list(list(a = 1L, b = "a")))
expect_error(
@@ -251,6 +273,8 @@ test_that("SPARK-32478: gapply() Arrow optimization - error message for schema m
test_that("SPARK-43789: Automatically pick the number of partitions based on Arrow batch size", {
skip_if_not_installed("arrow")
+ # TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
+ skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
conf <- callJMethod(sparkSession, "conf")
maxRecordsPerBatch <- sparkR.conf("spark.sql.execution.arrow.maxRecordsPerBatch")[[1]]
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org