You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kyuubi.apache.org by ch...@apache.org on 2023/04/23 12:17:30 UTC
[kyuubi] branch master updated: [KYUUBI #4741] Kyuubi Spark Engine/TPC connectors support Spark 3.4
This is an automated email from the ASF dual-hosted git repository.
chengpan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kyuubi.git
The following commit(s) were added to refs/heads/master by this push:
new ab1f67cb3 [KYUUBI #4741] Kyuubi Spark Engine/TPC connectors support Spark 3.4
ab1f67cb3 is described below
commit ab1f67cb3193e607d4d90fd19e2d747cfa3be6cc
Author: Cheng Pan <ch...@apache.org>
AuthorDate: Sun Apr 23 20:17:20 2023 +0800
[KYUUBI #4741] Kyuubi Spark Engine/TPC connectors support Spark 3.4
### _Why are the changes needed?_
- Add CI for Spark 3.4
- Kyuubi Spark TPC-DS/H connectors support Spark 3.4
### _How was this patch tested?_
- [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible
- [ ] Add screenshots for manual tests if appropriate
- [x] [Run test](https://kyuubi.readthedocs.io/en/master/develop_tools/testing.html#running-tests) locally before make a pull request
Closes #4741 from pan3793/spark-3.4.
Closes #4741
84a2d6ad7 [Cheng Pan] log
b9b2ec1fb [Cheng Pan] Add spark-3.4 profile
Authored-by: Cheng Pan <ch...@apache.org>
Signed-off-by: Cheng Pan <ch...@apache.org>
---
.github/workflows/master.yml | 12 ++
.../spark/kyuubi-extension-spark-3-1/pom.xml | 13 +-
.../spark/kyuubi-extension-spark-3-2/pom.xml | 13 +-
.../spark/kyuubi-extension-spark-common/pom.xml | 13 +-
extensions/spark/kyuubi-spark-authz/README.md | 1 +
.../spark/kyuubi-spark-connector-common/pom.xml | 13 +-
.../spark/connector/tpcds/TPCDSCatalogSuite.scala | 3 +-
.../spark/connector/tpch/TPCHCatalogSuite.scala | 3 +-
extensions/spark/kyuubi-spark-lineage/README.md | 1 +
extensions/spark/kyuubi-spark-lineage/pom.xml | 6 +-
pom.xml | 139 ++++++++-------------
11 files changed, 122 insertions(+), 95 deletions(-)
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index f8febf9ed..aeb0a14bb 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -50,6 +50,7 @@ jobs:
- '3.1'
- '3.2'
- '3.3'
+ - '3.4'
spark-archive: [""]
exclude-tags: [""]
comment: ["normal"]
@@ -64,6 +65,11 @@ jobs:
spark-archive: '-Dspark.archive.mirror=https://archive.apache.org/dist/spark/spark-3.2.4 -Dspark.archive.name=spark-3.2.4-bin-hadoop3.2.tgz'
exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest'
comment: 'verify-on-spark-3.2-binary'
+ - java: 8
+ spark: '3.3'
+ spark-archive: '-Dspark.archive.mirror=https://archive.apache.org/dist/spark/spark-3.4.0 -Dspark.archive.name=spark-3.4.0-bin-hadoop3.tgz'
+ exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest'
+ comment: 'verify-on-spark-3.4-binary'
env:
SPARK_LOCAL_IP: localhost
steps:
@@ -88,6 +94,12 @@ jobs:
- name: Build and test Kyuubi and Spark with maven w/o linters
run: |
TEST_MODULES="dev/kyuubi-codecov"
+ if [[ "${{ matrix.spark }}" == "3.4" ]]; then
+ # FIXME: Spark 3.4 supports authz plugin
+ TEST_MODULES="$TEST_MODULES,!extensions/spark/kyuubi-spark-authz"
+ # FIXME: Spark 3.4 supports lineage plugin
+ TEST_MODULES="$TEST_MODULES,!extensions/spark/kyuubi-spark-lineage"
+ fi
./build/mvn clean install ${MVN_OPT} -pl ${TEST_MODULES} -am \
-Pspark-${{ matrix.spark }} ${{ matrix.spark-archive }} ${{ matrix.exclude-tags }}
- name: Code coverage
diff --git a/extensions/spark/kyuubi-extension-spark-3-1/pom.xml b/extensions/spark/kyuubi-extension-spark-3-1/pom.xml
index 9f218f9d0..a36dffaef 100644
--- a/extensions/spark/kyuubi-extension-spark-3-1/pom.xml
+++ b/extensions/spark/kyuubi-extension-spark-3-1/pom.xml
@@ -125,10 +125,21 @@
<artifactId>jakarta.xml.bind-api</artifactId>
<scope>test</scope>
</dependency>
+
+ <dependency>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-1.2-api</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-slf4j-impl</artifactId>
+ <scope>test</scope>
+ </dependency>
</dependencies>
<build>
-
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
diff --git a/extensions/spark/kyuubi-extension-spark-3-2/pom.xml b/extensions/spark/kyuubi-extension-spark-3-2/pom.xml
index a80040aca..3f8019fa9 100644
--- a/extensions/spark/kyuubi-extension-spark-3-2/pom.xml
+++ b/extensions/spark/kyuubi-extension-spark-3-2/pom.xml
@@ -125,10 +125,21 @@
<artifactId>jakarta.xml.bind-api</artifactId>
<scope>test</scope>
</dependency>
+
+ <dependency>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-1.2-api</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-slf4j-impl</artifactId>
+ <scope>test</scope>
+ </dependency>
</dependencies>
<build>
-
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
diff --git a/extensions/spark/kyuubi-extension-spark-common/pom.xml b/extensions/spark/kyuubi-extension-spark-common/pom.xml
index 6d4bd1443..e11600408 100644
--- a/extensions/spark/kyuubi-extension-spark-common/pom.xml
+++ b/extensions/spark/kyuubi-extension-spark-common/pom.xml
@@ -110,10 +110,21 @@
<artifactId>jakarta.xml.bind-api</artifactId>
<scope>test</scope>
</dependency>
+
+ <dependency>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-1.2-api</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-slf4j-impl</artifactId>
+ <scope>test</scope>
+ </dependency>
</dependencies>
<build>
-
<plugins>
<plugin>
<groupId>org.antlr</groupId>
diff --git a/extensions/spark/kyuubi-spark-authz/README.md b/extensions/spark/kyuubi-spark-authz/README.md
index eb3804a65..617a40a16 100644
--- a/extensions/spark/kyuubi-spark-authz/README.md
+++ b/extensions/spark/kyuubi-spark-authz/README.md
@@ -34,6 +34,7 @@ build/mvn clean package -pl :kyuubi-spark-authz_2.12 -Dspark.version=3.2.1 -Dran
`-Dspark.version=`
- [x] master
+- [ ] 3.4.x
- [x] 3.3.x (default)
- [x] 3.2.x
- [x] 3.1.x
diff --git a/extensions/spark/kyuubi-spark-connector-common/pom.xml b/extensions/spark/kyuubi-spark-connector-common/pom.xml
index 1cba0ccdd..e36361753 100644
--- a/extensions/spark/kyuubi-spark-connector-common/pom.xml
+++ b/extensions/spark/kyuubi-spark-connector-common/pom.xml
@@ -87,10 +87,21 @@
<artifactId>scalacheck-1-17_${scala.binary.version}</artifactId>
<scope>test</scope>
</dependency>
+
+ <dependency>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-1.2-api</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-slf4j-impl</artifactId>
+ <scope>test</scope>
+ </dependency>
</dependencies>
<build>
-
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
diff --git a/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSCatalogSuite.scala b/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSCatalogSuite.scala
index 8a37d95e8..55a7fa3e9 100644
--- a/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSCatalogSuite.scala
+++ b/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSCatalogSuite.scala
@@ -170,7 +170,8 @@ class TPCDSCatalogSuite extends KyuubiFunSuite {
val exception = intercept[AnalysisException] {
spark.table("tpcds.sf1.nonexistent_table")
}
- assert(exception.message === "Table or view not found: tpcds.sf1.nonexistent_table")
+ assert(exception.message.contains("Table or view not found")
+ || exception.message.contains("TABLE_OR_VIEW_NOT_FOUND"))
}
}
}
diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHCatalogSuite.scala b/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHCatalogSuite.scala
index ee817ecae..0fdfc2689 100644
--- a/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHCatalogSuite.scala
+++ b/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHCatalogSuite.scala
@@ -158,7 +158,8 @@ class TPCHCatalogSuite extends KyuubiFunSuite {
val exception = intercept[AnalysisException] {
spark.table("tpch.sf1.nonexistent_table")
}
- assert(exception.message === "Table or view not found: tpch.sf1.nonexistent_table")
+ assert(exception.message.contains("Table or view not found")
+ || exception.message.contains("TABLE_OR_VIEW_NOT_FOUND"))
}
}
}
diff --git a/extensions/spark/kyuubi-spark-lineage/README.md b/extensions/spark/kyuubi-spark-lineage/README.md
index 5365f2d77..a713221ac 100644
--- a/extensions/spark/kyuubi-spark-lineage/README.md
+++ b/extensions/spark/kyuubi-spark-lineage/README.md
@@ -34,6 +34,7 @@ build/mvn clean package -pl :kyuubi-spark-lineage_2.12 -am -Dspark.version=3.2.1
`-Dspark.version=`
- [x] master
+- [ ] 3.4.x
- [x] 3.3.x (default)
- [x] 3.2.x
- [x] 3.1.x
diff --git a/extensions/spark/kyuubi-spark-lineage/pom.xml b/extensions/spark/kyuubi-spark-lineage/pom.xml
index bc13480d7..8583dfec0 100644
--- a/extensions/spark/kyuubi-spark-lineage/pom.xml
+++ b/extensions/spark/kyuubi-spark-lineage/pom.xml
@@ -85,10 +85,14 @@
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ <scope>test</scope>
+ </dependency>
</dependencies>
<build>
-
<testResources>
<testResource>
<directory>${project.basedir}/src/test/resources</directory>
diff --git a/pom.xml b/pom.xml
index a6e5d2df5..ae6dc2e29 100644
--- a/pom.xml
+++ b/pom.xml
@@ -196,6 +196,8 @@
<swagger.version>2.2.1</swagger.version>
<swagger-ui.version>4.9.1</swagger-ui.version>
<testcontainers-scala.version>0.40.12</testcontainers-scala.version>
+ <!-- https://github.com/ThreeTen/threeten-extra/issues/226 -->
+ <threeten.version>1.7.0</threeten.version>
<thrift.version>0.9.3</thrift.version>
<trino.client.version>363</trino.client.version>
<trino.tpcds.version>1.4</trino.tpcds.version>
@@ -403,19 +405,15 @@
<dependency>
<groupId>org.apache.spark</groupId>
- <artifactId>spark-repl_${scala.binary.version}</artifactId>
+ <artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<exclusions>
- <!--
- Use Hadoop Shaded Client to gain more clean transitive dependencies
- -->
+ <!-- Use Hadoop Shaded Client to gain more clean transitive dependencies -->
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
</exclusion>
- <!--
- Use log4j2
- -->
+ <!-- Use log4j2 -->
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
@@ -424,54 +422,51 @@
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
+ <!-- SPARK-40511 upgrade SLF4J2, which is not compatible w/ SLF4J1 -->
+ <exclusion>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-slf4j2-impl</artifactId>
+ </exclusion>
</exclusions>
</dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-repl_${scala.binary.version}</artifactId>
+ <version>${spark.version}</version>
+ </dependency>
+
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-hive_${scala.binary.version}</artifactId>
+ <version>${spark.version}</version>
<exclusions>
- <!--
- Use Hadoop Shaded Client to gain more clean transitive dependencies
- -->
+ <!-- Use Hadoop Shaded Client to gain more clean transitive dependencies -->
<exclusion>
<groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- </exclusion>
- <!--
- Use log4j2
- -->
- <exclusion>
- <groupId>log4j</groupId>
- <artifactId>log4j</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-log4j12</artifactId>
+ <artifactId>hadoop-common</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
- <artifactId>spark-hive_${scala.binary.version}</artifactId>
+ <artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
+ <type>test-jar</type>
<exclusions>
- <!--
- Use Hadoop Shaded Client to gain more clean transitive dependencies
- -->
+ <!-- Use Hadoop Shaded Client to gain more clean transitive dependencies -->
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
</exclusion>
- <exclusion>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-common</artifactId>
- </exclusion>
- <!--
- Use log4j2
- -->
+ <!-- Use log4j2 -->
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
@@ -480,26 +475,11 @@
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
- </exclusions>
- </dependency>
-
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-core_${scala.binary.version}</artifactId>
- <version>${spark.version}</version>
- <type>test-jar</type>
- <exclusions>
- <!--
- Use Hadoop Shaded Client to gain more clean transitive dependencies
- -->
+ <!-- SPARK-40511 upgrade SLF4J2, which is not compatible w/ SLF4J1 -->
<exclusion>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-slf4j2-impl</artifactId>
</exclusion>
- <!--
- The module is only used in Kyuubi Spark Extensions, we should respect
- the Spark bundled log4j.
- -->
</exclusions>
</dependency>
@@ -508,19 +488,6 @@
<artifactId>spark-catalyst_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<type>test-jar</type>
- <exclusions>
- <!--
- Use Hadoop Shaded Client to gain more clean transitive dependencies
- -->
- <exclusion>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- </exclusion>
- <!--
- The module is only used in Kyuubi Spark Extensions, so we don't care about which
- version of Log4j it depends on.
- -->
- </exclusions>
</dependency>
<dependency>
@@ -528,19 +495,6 @@
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<type>test-jar</type>
- <exclusions>
- <!--
- Use Hadoop Shaded Client to gain more clean transitive dependencies
- -->
- <exclusion>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- </exclusion>
- <!--
- The module is only used in Kyuubi Spark Extensions and Engine Spark SQL, so we
- don't care about which version of Log4j it depends on.
- -->
- </exclusions>
</dependency>
<dependency>
@@ -1524,6 +1478,12 @@
<artifactId>service</artifactId>
<version>${openai.java.version}</version>
</dependency>
+
+ <dependency>
+ <groupId>org.threeten</groupId>
+ <artifactId>threeten-extra</artifactId>
+ <version>${threeten.version}</version>
+ </dependency>
</dependencies>
</dependencyManagement>
@@ -2210,23 +2170,26 @@
</properties>
</profile>
+ <profile>
+ <id>spark-3.4</id>
+ <modules>
+ <module>extensions/spark/kyuubi-spark-connector-hive</module>
+ <module>extensions/spark/kyuubi-spark-connector-kudu</module>
+ </modules>
+ <properties>
+ <spark.version>3.4.0</spark.version>
+ <!-- FIXME: used for constructing Iceberg artifact name, correct it once Iceberg supports Spark 3.4 -->
+ <spark.binary.version>3.3</spark.binary.version>
+ <maven.plugin.scalatest.exclude.tags>org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest,org.apache.kyuubi.tags.IcebergTest</maven.plugin.scalatest.exclude.tags>
+ </properties>
+ </profile>
+
<profile>
<id>spark-master</id>
<properties>
<spark.version>3.5.0-SNAPSHOT</spark.version>
- <!-- https://github.com/ThreeTen/threeten-extra/issues/226 -->
- <threeten.version>1.7.0</threeten.version>
<maven.plugin.scalatest.exclude.tags>org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.PySparkTest</maven.plugin.scalatest.exclude.tags>
</properties>
- <dependencyManagement>
- <dependencies>
- <dependency>
- <groupId>org.threeten</groupId>
- <artifactId>threeten-extra</artifactId>
- <version>${threeten.version}</version>
- </dependency>
- </dependencies>
- </dependencyManagement>
<repositories>
<repository>
<releases>