You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2023/06/21 02:11:07 UTC
[spark] branch master updated: [SPARK-44125][R] Support Java 21 in SparkR

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 95f071cf5f3 [SPARK-44125][R] Support Java 21 in SparkR
95f071cf5f3 is described below

commit 95f071cf5f34d73d193b9c4f28f5459fa92aaeef
Author: Dongjoon Hyun <do...@apache.org>
AuthorDate: Wed Jun 21 11:10:54 2023 +0900

    [SPARK-44125][R] Support Java 21 in SparkR
    
    ### What changes were proposed in this pull request?
    
    This PR aims to support Java 21 in SparkR. Arrow-related issue will be fixed when we upgrade Arrow library. Also, the following JIRA is created to re-enable them even in Java 21.
    - SPARK-44127 Reenable `test_sparkSQL_arrow.R` in Java 21
    
    ### Why are the changes needed?
    
    To be ready for Java 21.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No, this is additional support.
    
    ### How was this patch tested?
    
    Pass the CIs and do manual tests.
    
    ```
    $ java -version
    openjdk version "21-ea" 2023-09-19
    OpenJDK Runtime Environment (build 21-ea+27-2343)
    OpenJDK 64-Bit Server VM (build 21-ea+27-2343, mixed mode, sharing)
    
    $ build/sbt test:package -Psparkr -Phive
    
    $ R/install-dev.sh; R/run-tests.sh
    ...
    ══ Skipped ═════════════════════════════════════════════════════════════════════
    1. createDataFrame/collect Arrow optimization ('test_sparkSQL_arrow.R:29:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE
    
    2. createDataFrame/collect Arrow optimization - many partitions (partition order test) ('test_sparkSQL_arrow.R:47:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE
    
    3. createDataFrame/collect Arrow optimization - type specification ('test_sparkSQL_arrow.R:54:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE
    
    4. dapply() Arrow optimization ('test_sparkSQL_arrow.R:79:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE
    
    5. dapply() Arrow optimization - type specification ('test_sparkSQL_arrow.R:114:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE
    
    6. dapply() Arrow optimization - type specification (date and timestamp) ('test_sparkSQL_arrow.R:144:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE
    
    7. gapply() Arrow optimization ('test_sparkSQL_arrow.R:154:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE
    
    8. gapply() Arrow optimization - type specification ('test_sparkSQL_arrow.R:198:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE
    
    9. gapply() Arrow optimization - type specification (date and timestamp) ('test_sparkSQL_arrow.R:231:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE
    
    10. Arrow optimization - unsupported types ('test_sparkSQL_arrow.R:243:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE
    
    11. SPARK-32478: gapply() Arrow optimization - error message for schema mismatch ('test_sparkSQL_arrow.R:255:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE
    
    12. SPARK-43789: Automatically pick the number of partitions based on Arrow batch size ('test_sparkSQL_arrow.R:265:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE
    
    13. sparkJars tag in SparkContext ('test_Windows.R:22:5') - Reason: This test is only for Windows, skipped
    
    ══ DONE ════════════════════════════════════════════════════════════════════════
    ...
    * DONE
    
    Status: 2 NOTEs
    See
      ‘/Users/dongjoon/APACHE/spark-merge/R/SparkR.Rcheck/00check.log’
    for details.
    
    + popd
    Tests passed.
    ```
    
    Closes #41680 from dongjoon-hyun/SPARK-44125.
    
    Authored-by: Dongjoon Hyun <do...@apache.org>
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
 R/pkg/R/client.R                            |  6 ++++--
 R/pkg/tests/fulltests/test_sparkSQL_arrow.R | 24 ++++++++++++++++++++++++
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R
index 797a5c7da15..88f9e9fe857 100644
--- a/R/pkg/R/client.R
+++ b/R/pkg/R/client.R
@@ -93,8 +93,10 @@ checkJavaVersion <- function() {
       }, javaVersionOut)
 
   javaVersionStr <- strsplit(javaVersionFilter[[1]], '"', fixed = TRUE)[[1L]][2]
-  # javaVersionStr is of the form 1.8.0_92/9.0.x/11.0.x.
-  # We are using 8, 9, 10, 11 for sparkJavaVersion.
+  # javaVersionStr is of the form 1.8.0_92/11.0.x./17.0.x/21-ea/21
+  # We are using 8, 11, 17, and 21 for sparkJavaVersion.
+  javaVersionStr <- strsplit(javaVersionStr, "-ea", fixed = TRUE)[[1L]]
+
   versions <- strsplit(javaVersionStr, ".", fixed = TRUE)[[1L]]
   if ("1" == versions[1]) {
     javaVersionNum <- as.integer(versions[2])
diff --git a/R/pkg/tests/fulltests/test_sparkSQL_arrow.R b/R/pkg/tests/fulltests/test_sparkSQL_arrow.R
index 1ec64077b48..4c3272f9034 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL_arrow.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL_arrow.R
@@ -26,6 +26,8 @@ sparkSession <- sparkR.session(
 
 test_that("createDataFrame/collect Arrow optimization", {
   skip_if_not_installed("arrow")
+  # TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
+  skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
 
   conf <- callJMethod(sparkSession, "conf")
   arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.sparkr.enabled")[[1]]
@@ -43,12 +45,16 @@ test_that("createDataFrame/collect Arrow optimization", {
 
 test_that("createDataFrame/collect Arrow optimization - many partitions (partition order test)", {
   skip_if_not_installed("arrow")
+  # TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
+  skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
   expect_equal(collect(createDataFrame(mtcars, numPartitions = 32)),
                collect(createDataFrame(mtcars, numPartitions = 1)))
 })
 
 test_that("createDataFrame/collect Arrow optimization - type specification", {
   skip_if_not_installed("arrow")
+  # TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
+  skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
   rdf <- data.frame(list(list(a = 1,
                               b = "a",
                               c = TRUE,
@@ -73,6 +79,8 @@ test_that("createDataFrame/collect Arrow optimization - type specification", {
 
 test_that("dapply() Arrow optimization", {
   skip_if_not_installed("arrow")
+  # TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
+  skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
   df <- createDataFrame(mtcars)
 
   conf <- callJMethod(sparkSession, "conf")
@@ -107,6 +115,8 @@ test_that("dapply() Arrow optimization", {
 
 test_that("dapply() Arrow optimization - type specification", {
   skip_if_not_installed("arrow")
+  # TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
+  skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
   # Note that regular dapply() seems not supporting date and timestamps
   # whereas Arrow-optimized dapply() does.
   rdf <- data.frame(list(list(a = 1,
@@ -136,6 +146,8 @@ test_that("dapply() Arrow optimization - type specification", {
 
 test_that("dapply() Arrow optimization - type specification (date and timestamp)", {
   skip_if_not_installed("arrow")
+  # TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
+  skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
   rdf <- data.frame(list(list(a = as.Date("1990-02-24"),
                               b = as.POSIXct("1990-02-24 12:34:56"))))
   df <- createDataFrame(rdf)
@@ -145,6 +157,8 @@ test_that("dapply() Arrow optimization - type specification (date and timestamp)
 
 test_that("gapply() Arrow optimization", {
   skip_if_not_installed("arrow")
+  # TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
+  skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
   df <- createDataFrame(mtcars)
 
   conf <- callJMethod(sparkSession, "conf")
@@ -188,6 +202,8 @@ test_that("gapply() Arrow optimization", {
 
 test_that("gapply() Arrow optimization - type specification", {
   skip_if_not_installed("arrow")
+  # TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
+  skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
   # Note that regular gapply() seems not supporting date and timestamps
   # whereas Arrow-optimized gapply() does.
   rdf <- data.frame(list(list(a = 1,
@@ -220,6 +236,8 @@ test_that("gapply() Arrow optimization - type specification", {
 
 test_that("gapply() Arrow optimization - type specification (date and timestamp)", {
   skip_if_not_installed("arrow")
+  # TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
+  skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
   rdf <- data.frame(list(list(a = as.Date("1990-02-24"),
                               b = as.POSIXct("1990-02-24 12:34:56"))))
   df <- createDataFrame(rdf)
@@ -231,6 +249,8 @@ test_that("gapply() Arrow optimization - type specification (date and timestamp)
 
 test_that("Arrow optimization - unsupported types", {
   skip_if_not_installed("arrow")
+  # TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
+  skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
 
   expect_error(checkSchemaInArrow(structType("a FLOAT")), "not support float type")
   expect_error(checkSchemaInArrow(structType("a BINARY")), "not support binary type")
@@ -242,6 +262,8 @@ test_that("Arrow optimization - unsupported types", {
 
 test_that("SPARK-32478: gapply() Arrow optimization - error message for schema mismatch", {
   skip_if_not_installed("arrow")
+  # TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
+  skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
   df <- createDataFrame(list(list(a = 1L, b = "a")))
 
   expect_error(
@@ -251,6 +273,8 @@ test_that("SPARK-32478: gapply() Arrow optimization - error message for schema m
 
 test_that("SPARK-43789: Automatically pick the number of partitions based on Arrow batch size", {
   skip_if_not_installed("arrow")
+  # TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
+  skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
 
   conf <- callJMethod(sparkSession, "conf")
   maxRecordsPerBatch <- sparkR.conf("spark.sql.execution.arrow.maxRecordsPerBatch")[[1]]


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org