You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kyuubi.apache.org by ch...@apache.org on 2023/04/23 12:17:30 UTC

[kyuubi] branch master updated: [KYUUBI #4741] Kyuubi Spark Engine/TPC connectors support Spark 3.4

This is an automated email from the ASF dual-hosted git repository.

chengpan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kyuubi.git


The following commit(s) were added to refs/heads/master by this push:
     new ab1f67cb3 [KYUUBI #4741] Kyuubi Spark Engine/TPC connectors support Spark 3.4
ab1f67cb3 is described below

commit ab1f67cb3193e607d4d90fd19e2d747cfa3be6cc
Author: Cheng Pan <ch...@apache.org>
AuthorDate: Sun Apr 23 20:17:20 2023 +0800

    [KYUUBI #4741] Kyuubi Spark Engine/TPC connectors support Spark 3.4
    
    ### _Why are the changes needed?_
    
    - Add CI for Spark 3.4
    - Kyuubi Spark TPC-DS/H connectors support Spark 3.4
    
    ### _How was this patch tested?_
    - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible
    
    - [ ] Add screenshots for manual tests if appropriate
    
    - [x] [Run test](https://kyuubi.readthedocs.io/en/master/develop_tools/testing.html#running-tests) locally before make a pull request
    
    Closes #4741 from pan3793/spark-3.4.
    
    Closes #4741
    
    84a2d6ad7 [Cheng Pan] log
    b9b2ec1fb [Cheng Pan] Add spark-3.4 profile
    
    Authored-by: Cheng Pan <ch...@apache.org>
    Signed-off-by: Cheng Pan <ch...@apache.org>
---
 .github/workflows/master.yml                       |  12 ++
 .../spark/kyuubi-extension-spark-3-1/pom.xml       |  13 +-
 .../spark/kyuubi-extension-spark-3-2/pom.xml       |  13 +-
 .../spark/kyuubi-extension-spark-common/pom.xml    |  13 +-
 extensions/spark/kyuubi-spark-authz/README.md      |   1 +
 .../spark/kyuubi-spark-connector-common/pom.xml    |  13 +-
 .../spark/connector/tpcds/TPCDSCatalogSuite.scala  |   3 +-
 .../spark/connector/tpch/TPCHCatalogSuite.scala    |   3 +-
 extensions/spark/kyuubi-spark-lineage/README.md    |   1 +
 extensions/spark/kyuubi-spark-lineage/pom.xml      |   6 +-
 pom.xml                                            | 139 ++++++++-------------
 11 files changed, 122 insertions(+), 95 deletions(-)

diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index f8febf9ed..aeb0a14bb 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -50,6 +50,7 @@ jobs:
           - '3.1'
           - '3.2'
           - '3.3'
+          - '3.4'
         spark-archive: [""]
         exclude-tags: [""]
         comment: ["normal"]
@@ -64,6 +65,11 @@ jobs:
             spark-archive: '-Dspark.archive.mirror=https://archive.apache.org/dist/spark/spark-3.2.4 -Dspark.archive.name=spark-3.2.4-bin-hadoop3.2.tgz'
             exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest'
             comment: 'verify-on-spark-3.2-binary'
+          - java: 8
+            spark: '3.3'
+            spark-archive: '-Dspark.archive.mirror=https://archive.apache.org/dist/spark/spark-3.4.0 -Dspark.archive.name=spark-3.4.0-bin-hadoop3.tgz'
+            exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest'
+            comment: 'verify-on-spark-3.4-binary'
     env:
       SPARK_LOCAL_IP: localhost
     steps:
@@ -88,6 +94,12 @@ jobs:
       - name: Build and test Kyuubi and Spark with maven w/o linters
         run: |
           TEST_MODULES="dev/kyuubi-codecov"
+          if [[ "${{ matrix.spark }}" == "3.4" ]]; then
+            # FIXME: Spark 3.4 supports authz plugin
+            TEST_MODULES="$TEST_MODULES,!extensions/spark/kyuubi-spark-authz"
+            # FIXME: Spark 3.4 supports lineage plugin
+            TEST_MODULES="$TEST_MODULES,!extensions/spark/kyuubi-spark-lineage"
+          fi
           ./build/mvn clean install ${MVN_OPT} -pl ${TEST_MODULES} -am \
           -Pspark-${{ matrix.spark }} ${{ matrix.spark-archive }} ${{ matrix.exclude-tags }}
       - name: Code coverage
diff --git a/extensions/spark/kyuubi-extension-spark-3-1/pom.xml b/extensions/spark/kyuubi-extension-spark-3-1/pom.xml
index 9f218f9d0..a36dffaef 100644
--- a/extensions/spark/kyuubi-extension-spark-3-1/pom.xml
+++ b/extensions/spark/kyuubi-extension-spark-3-1/pom.xml
@@ -125,10 +125,21 @@
             <artifactId>jakarta.xml.bind-api</artifactId>
             <scope>test</scope>
         </dependency>
+
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-1.2-api</artifactId>
+            <scope>test</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-slf4j-impl</artifactId>
+            <scope>test</scope>
+        </dependency>
     </dependencies>
 
     <build>
-
         <plugins>
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
diff --git a/extensions/spark/kyuubi-extension-spark-3-2/pom.xml b/extensions/spark/kyuubi-extension-spark-3-2/pom.xml
index a80040aca..3f8019fa9 100644
--- a/extensions/spark/kyuubi-extension-spark-3-2/pom.xml
+++ b/extensions/spark/kyuubi-extension-spark-3-2/pom.xml
@@ -125,10 +125,21 @@
             <artifactId>jakarta.xml.bind-api</artifactId>
             <scope>test</scope>
         </dependency>
+
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-1.2-api</artifactId>
+            <scope>test</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-slf4j-impl</artifactId>
+            <scope>test</scope>
+        </dependency>
     </dependencies>
 
     <build>
-
         <plugins>
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
diff --git a/extensions/spark/kyuubi-extension-spark-common/pom.xml b/extensions/spark/kyuubi-extension-spark-common/pom.xml
index 6d4bd1443..e11600408 100644
--- a/extensions/spark/kyuubi-extension-spark-common/pom.xml
+++ b/extensions/spark/kyuubi-extension-spark-common/pom.xml
@@ -110,10 +110,21 @@
             <artifactId>jakarta.xml.bind-api</artifactId>
             <scope>test</scope>
         </dependency>
+
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-1.2-api</artifactId>
+            <scope>test</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-slf4j-impl</artifactId>
+            <scope>test</scope>
+        </dependency>
     </dependencies>
 
     <build>
-
         <plugins>
             <plugin>
                 <groupId>org.antlr</groupId>
diff --git a/extensions/spark/kyuubi-spark-authz/README.md b/extensions/spark/kyuubi-spark-authz/README.md
index eb3804a65..617a40a16 100644
--- a/extensions/spark/kyuubi-spark-authz/README.md
+++ b/extensions/spark/kyuubi-spark-authz/README.md
@@ -34,6 +34,7 @@ build/mvn clean package -pl :kyuubi-spark-authz_2.12 -Dspark.version=3.2.1 -Dran
 `-Dspark.version=`
 
 - [x] master
+- [ ] 3.4.x
 - [x] 3.3.x (default)
 - [x] 3.2.x
 - [x] 3.1.x
diff --git a/extensions/spark/kyuubi-spark-connector-common/pom.xml b/extensions/spark/kyuubi-spark-connector-common/pom.xml
index 1cba0ccdd..e36361753 100644
--- a/extensions/spark/kyuubi-spark-connector-common/pom.xml
+++ b/extensions/spark/kyuubi-spark-connector-common/pom.xml
@@ -87,10 +87,21 @@
             <artifactId>scalacheck-1-17_${scala.binary.version}</artifactId>
             <scope>test</scope>
         </dependency>
+
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-1.2-api</artifactId>
+            <scope>test</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-slf4j-impl</artifactId>
+            <scope>test</scope>
+        </dependency>
     </dependencies>
 
     <build>
-
         <plugins>
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
diff --git a/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSCatalogSuite.scala b/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSCatalogSuite.scala
index 8a37d95e8..55a7fa3e9 100644
--- a/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSCatalogSuite.scala
+++ b/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSCatalogSuite.scala
@@ -170,7 +170,8 @@ class TPCDSCatalogSuite extends KyuubiFunSuite {
       val exception = intercept[AnalysisException] {
         spark.table("tpcds.sf1.nonexistent_table")
       }
-      assert(exception.message === "Table or view not found: tpcds.sf1.nonexistent_table")
+      assert(exception.message.contains("Table or view not found")
+        || exception.message.contains("TABLE_OR_VIEW_NOT_FOUND"))
     }
   }
 }
diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHCatalogSuite.scala b/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHCatalogSuite.scala
index ee817ecae..0fdfc2689 100644
--- a/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHCatalogSuite.scala
+++ b/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHCatalogSuite.scala
@@ -158,7 +158,8 @@ class TPCHCatalogSuite extends KyuubiFunSuite {
       val exception = intercept[AnalysisException] {
         spark.table("tpch.sf1.nonexistent_table")
       }
-      assert(exception.message === "Table or view not found: tpch.sf1.nonexistent_table")
+      assert(exception.message.contains("Table or view not found")
+        || exception.message.contains("TABLE_OR_VIEW_NOT_FOUND"))
     }
   }
 }
diff --git a/extensions/spark/kyuubi-spark-lineage/README.md b/extensions/spark/kyuubi-spark-lineage/README.md
index 5365f2d77..a713221ac 100644
--- a/extensions/spark/kyuubi-spark-lineage/README.md
+++ b/extensions/spark/kyuubi-spark-lineage/README.md
@@ -34,6 +34,7 @@ build/mvn clean package -pl :kyuubi-spark-lineage_2.12 -am -Dspark.version=3.2.1
 `-Dspark.version=`
 
 - [x] master
+- [ ] 3.4.x
 - [x] 3.3.x (default)
 - [x] 3.2.x
 - [x] 3.1.x
diff --git a/extensions/spark/kyuubi-spark-lineage/pom.xml b/extensions/spark/kyuubi-spark-lineage/pom.xml
index bc13480d7..8583dfec0 100644
--- a/extensions/spark/kyuubi-spark-lineage/pom.xml
+++ b/extensions/spark/kyuubi-spark-lineage/pom.xml
@@ -85,10 +85,14 @@
             <scope>test</scope>
         </dependency>
 
+        <dependency>
+            <groupId>com.google.guava</groupId>
+            <artifactId>guava</artifactId>
+            <scope>test</scope>
+        </dependency>
     </dependencies>
 
     <build>
-
         <testResources>
             <testResource>
                 <directory>${project.basedir}/src/test/resources</directory>
diff --git a/pom.xml b/pom.xml
index a6e5d2df5..ae6dc2e29 100644
--- a/pom.xml
+++ b/pom.xml
@@ -196,6 +196,8 @@
         <swagger.version>2.2.1</swagger.version>
         <swagger-ui.version>4.9.1</swagger-ui.version>
         <testcontainers-scala.version>0.40.12</testcontainers-scala.version>
+        <!-- https://github.com/ThreeTen/threeten-extra/issues/226 -->
+        <threeten.version>1.7.0</threeten.version>
         <thrift.version>0.9.3</thrift.version>
         <trino.client.version>363</trino.client.version>
         <trino.tpcds.version>1.4</trino.tpcds.version>
@@ -403,19 +405,15 @@
 
             <dependency>
                 <groupId>org.apache.spark</groupId>
-                <artifactId>spark-repl_${scala.binary.version}</artifactId>
+                <artifactId>spark-core_${scala.binary.version}</artifactId>
                 <version>${spark.version}</version>
                 <exclusions>
-                    <!--
-                      Use Hadoop Shaded Client to gain more clean transitive dependencies
-                     -->
+                    <!-- Use Hadoop Shaded Client to gain more clean transitive dependencies -->
                     <exclusion>
                         <groupId>org.apache.hadoop</groupId>
                         <artifactId>hadoop-client</artifactId>
                     </exclusion>
-                    <!--
-                      Use log4j2
-                     -->
+                    <!--  Use log4j2 -->
                     <exclusion>
                         <groupId>log4j</groupId>
                         <artifactId>log4j</artifactId>
@@ -424,54 +422,51 @@
                         <groupId>org.slf4j</groupId>
                         <artifactId>slf4j-log4j12</artifactId>
                     </exclusion>
+                    <!-- SPARK-40511 upgrade SLF4J2, which is not compatible w/ SLF4J1 -->
+                    <exclusion>
+                        <groupId>org.apache.logging.log4j</groupId>
+                        <artifactId>log4j-slf4j2-impl</artifactId>
+                    </exclusion>
                 </exclusions>
             </dependency>
 
+            <dependency>
+                <groupId>org.apache.spark</groupId>
+                <artifactId>spark-repl_${scala.binary.version}</artifactId>
+                <version>${spark.version}</version>
+            </dependency>
+
             <dependency>
                 <groupId>org.apache.spark</groupId>
                 <artifactId>spark-sql_${scala.binary.version}</artifactId>
                 <version>${spark.version}</version>
+            </dependency>
+
+            <dependency>
+                <groupId>org.apache.spark</groupId>
+                <artifactId>spark-hive_${scala.binary.version}</artifactId>
+                <version>${spark.version}</version>
                 <exclusions>
-                    <!--
-                      Use Hadoop Shaded Client to gain more clean transitive dependencies
-                     -->
+                    <!-- Use Hadoop Shaded Client to gain more clean transitive dependencies -->
                     <exclusion>
                         <groupId>org.apache.hadoop</groupId>
-                        <artifactId>hadoop-client</artifactId>
-                    </exclusion>
-                    <!--
-                      Use log4j2
-                     -->
-                    <exclusion>
-                        <groupId>log4j</groupId>
-                        <artifactId>log4j</artifactId>
-                    </exclusion>
-                    <exclusion>
-                        <groupId>org.slf4j</groupId>
-                        <artifactId>slf4j-log4j12</artifactId>
+                        <artifactId>hadoop-common</artifactId>
                     </exclusion>
                 </exclusions>
             </dependency>
 
             <dependency>
                 <groupId>org.apache.spark</groupId>
-                <artifactId>spark-hive_${scala.binary.version}</artifactId>
+                <artifactId>spark-core_${scala.binary.version}</artifactId>
                 <version>${spark.version}</version>
+                <type>test-jar</type>
                 <exclusions>
-                    <!--
-                      Use Hadoop Shaded Client to gain more clean transitive dependencies
-                     -->
+                    <!-- Use Hadoop Shaded Client to gain more clean transitive dependencies -->
                     <exclusion>
                         <groupId>org.apache.hadoop</groupId>
                         <artifactId>hadoop-client</artifactId>
                     </exclusion>
-                    <exclusion>
-                        <groupId>org.apache.hadoop</groupId>
-                        <artifactId>hadoop-common</artifactId>
-                    </exclusion>
-                    <!--
-                      Use log4j2
-                     -->
+                    <!--  Use log4j2 -->
                     <exclusion>
                         <groupId>log4j</groupId>
                         <artifactId>log4j</artifactId>
@@ -480,26 +475,11 @@
                         <groupId>org.slf4j</groupId>
                         <artifactId>slf4j-log4j12</artifactId>
                     </exclusion>
-                </exclusions>
-            </dependency>
-
-            <dependency>
-                <groupId>org.apache.spark</groupId>
-                <artifactId>spark-core_${scala.binary.version}</artifactId>
-                <version>${spark.version}</version>
-                <type>test-jar</type>
-                <exclusions>
-                    <!--
-                      Use Hadoop Shaded Client to gain more clean transitive dependencies
-                     -->
+                    <!-- SPARK-40511 upgrade SLF4J2, which is not compatible w/ SLF4J1 -->
                     <exclusion>
-                        <groupId>org.apache.hadoop</groupId>
-                        <artifactId>hadoop-client</artifactId>
+                        <groupId>org.apache.logging.log4j</groupId>
+                        <artifactId>log4j-slf4j2-impl</artifactId>
                     </exclusion>
-                    <!--
-                      The module is only used in Kyuubi Spark Extensions, we should respect
-                      the Spark bundled log4j.
-                     -->
                 </exclusions>
             </dependency>
 
@@ -508,19 +488,6 @@
                 <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
                 <version>${spark.version}</version>
                 <type>test-jar</type>
-                <exclusions>
-                    <!--
-                      Use Hadoop Shaded Client to gain more clean transitive dependencies
-                     -->
-                    <exclusion>
-                        <groupId>org.apache.hadoop</groupId>
-                        <artifactId>hadoop-client</artifactId>
-                    </exclusion>
-                    <!--
-                      The module is only used in Kyuubi Spark Extensions, so we don't care about which
-                      version of Log4j it depends on.
-                     -->
-                </exclusions>
             </dependency>
 
             <dependency>
@@ -528,19 +495,6 @@
                 <artifactId>spark-sql_${scala.binary.version}</artifactId>
                 <version>${spark.version}</version>
                 <type>test-jar</type>
-                <exclusions>
-                    <!--
-                      Use Hadoop Shaded Client to gain more clean transitive dependencies
-                     -->
-                    <exclusion>
-                        <groupId>org.apache.hadoop</groupId>
-                        <artifactId>hadoop-client</artifactId>
-                    </exclusion>
-                    <!--
-                      The module is only used in Kyuubi Spark Extensions and Engine Spark SQL, so we
-                      don't care about which version of Log4j it depends on.
-                     -->
-                </exclusions>
             </dependency>
 
             <dependency>
@@ -1524,6 +1478,12 @@
                 <artifactId>service</artifactId>
                 <version>${openai.java.version}</version>
             </dependency>
+
+            <dependency>
+                <groupId>org.threeten</groupId>
+                <artifactId>threeten-extra</artifactId>
+                <version>${threeten.version}</version>
+            </dependency>
         </dependencies>
     </dependencyManagement>
 
@@ -2210,23 +2170,26 @@
             </properties>
         </profile>
 
+        <profile>
+            <id>spark-3.4</id>
+            <modules>
+                <module>extensions/spark/kyuubi-spark-connector-hive</module>
+                <module>extensions/spark/kyuubi-spark-connector-kudu</module>
+            </modules>
+            <properties>
+                <spark.version>3.4.0</spark.version>
+                <!-- FIXME: used for constructing Iceberg artifact name, correct it once Iceberg supports Spark 3.4 -->
+                <spark.binary.version>3.3</spark.binary.version>
+                <maven.plugin.scalatest.exclude.tags>org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest,org.apache.kyuubi.tags.IcebergTest</maven.plugin.scalatest.exclude.tags>
+            </properties>
+        </profile>
+
         <profile>
             <id>spark-master</id>
             <properties>
                 <spark.version>3.5.0-SNAPSHOT</spark.version>
-                <!-- https://github.com/ThreeTen/threeten-extra/issues/226 -->
-                <threeten.version>1.7.0</threeten.version>
                 <maven.plugin.scalatest.exclude.tags>org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.PySparkTest</maven.plugin.scalatest.exclude.tags>
             </properties>
-            <dependencyManagement>
-                <dependencies>
-                    <dependency>
-                        <groupId>org.threeten</groupId>
-                        <artifactId>threeten-extra</artifactId>
-                        <version>${threeten.version}</version>
-                    </dependency>
-                </dependencies>
-            </dependencyManagement>
             <repositories>
                 <repository>
                     <releases>