You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by hv...@apache.org on 2023/02/01 18:10:45 UTC
[spark] branch master updated: [SPARK-42228][BUILD][CONNECT] Add shade and relocation rule of grpc to connect-client-jvm module
This is an automated email from the ASF dual-hosted git repository.
hvanhovell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 48ab301dad5 [SPARK-42228][BUILD][CONNECT] Add shade and relocation rule of grpc to connect-client-jvm module
48ab301dad5 is described below
commit 48ab301dad5d632c0578bd48fcd6fb5b1173fbed
Author: yangjie01 <ya...@baidu.com>
AuthorDate: Wed Feb 1 14:10:30 2023 -0400
[SPARK-42228][BUILD][CONNECT] Add shade and relocation rule of grpc to connect-client-jvm module
### What changes were proposed in this pull request?
When I try to do E2E test for java connnec client and connect server out of `connect-client-jvm` module, for example, just move `ClientE2ETestSuite` into a separate module and run maven test, then I found the following errors:
```
ClientE2ETestSuite:
Starting the Spark Connect Server...
Using jar: /${basedir}/spark-mine/connector/connect/server/target/spark-connect_2.12-3.5.0-SNAPSHOT.jar
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
Spark context Web UI available at http://localhost:4040
Spark context available as 'sc' (master = local[*], app id = local-1674980902694).
Spark session available as 'spark'.
Welcome to
____ __
/ __/__ ___ _____/ /__
_\ \/ _ \/ _ `/ __/ '_/
/___/ .__/\_,_/_/ /_/\_\ version 3.5.0-SNAPSHOT
/_/
Using Scala version 2.12.17 (OpenJDK 64-Bit Server VM, Java 1.8.0_352)
Type in expressions to have them evaluated.
Type :help for more information.
java.lang.RuntimeException: Failed to start the test server on port 15290.
at org.apache.spark.sql.connect.client.util.RemoteSparkSession.beforeAll(RemoteSparkSession.scala:158)
at org.apache.spark.sql.connect.client.util.RemoteSparkSession.beforeAll$(RemoteSparkSession.scala:149)
at org.apache.spark.sql.ClientE2ETestSuite.beforeAll(ClientE2ETestSuite.scala:22)
at org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:212)
at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210)
at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208)
at org.apache.spark.sql.ClientE2ETestSuite.run(ClientE2ETestSuite.scala:22)
at org.scalatest.Suite.callExecuteOnSuite$1(Suite.scala:1178)
at org.scalatest.Suite.$anonfun$runNestedSuites$1(Suite.scala:1225)
at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198)
at org.scalatest.Suite.runNestedSuites(Suite.scala:1223)
at org.scalatest.Suite.runNestedSuites$(Suite.scala:1156)
at org.scalatest.tools.DiscoverySuite.runNestedSuites(DiscoverySuite.scala:30)
at org.scalatest.Suite.run(Suite.scala:1111)
at org.scalatest.Suite.run$(Suite.scala:1096)
at org.scalatest.tools.DiscoverySuite.run(DiscoverySuite.scala:30)
at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:47)
at org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13(Runner.scala:1321)
at org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13$adapted(Runner.scala:1315)
at scala.collection.immutable.List.foreach(List.scala:431)
at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:1315)
at org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24(Runner.scala:992)
at org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24$adapted(Runner.scala:970)
at org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:1481)
org.apache.spark.sql.ClientE2ETestSuite *** ABORTED ***
at org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:970)
java.lang.RuntimeException: Failed to start the test server on port 15290.
at org.apache.spark.sql.connect.client.util.RemoteSparkSession.beforeAll(RemoteSparkSession.scala:158)
at org.apache.spark.sql.connect.client.util.RemoteSparkSession.beforeAll$(RemoteSparkSession.scala:149)
at org.apache.spark.sql.ClientE2ETestSuite.beforeAll(ClientE2ETestSuite.scala:22)
at org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:212)
at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210)
at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208)
at org.apache.spark.sql.ClientE2ETestSuite.run(ClientE2ETestSuite.scala:22)
at org.scalatest.Suite.callExecuteOnSuite$1(Suite.scala:1178)
at org.scalatest.Suite.$anonfun$runNestedSuites$1(Suite.scala:1225)
at org.scalatest.tools.Runner$.main(Runner.scala:775)
at org.scalatest.tools.Runner.main(Runner.scala)
Suppressed: java.lang.NoSuchMethodError: io.grpc.protobuf.ProtoUtils.marshaller(Lorg/sparkproject/connect/protobuf/Message;)Lio/grpc/MethodDescriptor$Marshaller;
at org.apache.spark.connect.proto.SparkConnectServiceGrpc.getExecutePlanMethod(SparkConnectServiceGrpc.java:40)
at org.apache.spark.connect.proto.SparkConnectServiceGrpc$SparkConnectServiceBlockingStub.executePlan(SparkConnectServiceGrpc.java:242)
at org.apache.spark.sql.connect.client.SparkConnectClient.execute(SparkConnectClient.scala:64)
at org.apache.spark.sql.SparkSession.execute(SparkSession.scala:119)
at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
at org.apache.spark.sql.Dataset.collectResult(Dataset.scala:73)
at org.apache.spark.sql.connect.client.util.RemoteSparkSession.beforeAll(RemoteSparkSession.scala:164)
... 28 more
...
Suppressed: java.lang.NoSuchMethodError: io.grpc.protobuf.ProtoUtils.marshaller(Lorg/sparkproject/connect/protobuf/Message;)Lio/grpc/MethodDescriptor$Marshaller;
at org.apache.spark.connect.proto.SparkConnectServiceGrpc.getExecutePlanMethod(SparkConnectServiceGrpc.java:40)
at org.apache.spark.connect.proto.SparkConnectServiceGrpc$SparkConnectServiceBlockingStub.executePlan(SparkConnectServiceGrpc.java:242)
at org.apache.spark.sql.connect.client.SparkConnectClient.execute(SparkConnectClient.scala:64)
at org.apache.spark.sql.SparkSession.execute(SparkSession.scala:119)
at org.apache.spark.sql.Dataset.collectResult(Dataset.scala:73)
at org.apache.spark.sql.connect.client.util.RemoteSparkSession.beforeAll(RemoteSparkSession.scala:164)
... 28 more
Suppressed: java.lang.NoSuchMethodError: io.grpc.protobuf.ProtoUtils.marshaller(Lorg/sparkproject/connect/protobuf/Message;)Lio/grpc/MethodDescriptor$Marshaller;
at org.apache.spark.connect.proto.SparkConnectServiceGrpc.getExecutePlanMethod(SparkConnectServiceGrpc.java:40)
at org.apache.spark.connect.proto.SparkConnectServiceGrpc$SparkConnectServiceBlockingStub.executePlan(SparkConnectServiceGrpc.java:242)
at org.apache.spark.sql.connect.client.SparkConnectClient.execute(SparkConnectClient.scala:64)
at org.apache.spark.sql.SparkSession.execute(SparkSession.scala:119)
at org.apache.spark.sql.Dataset.collectResult(Dataset.scala:73)
at org.apache.spark.sql.connect.client.util.RemoteSparkSession.beforeAll(RemoteSparkSession.scala:164)
... 28 more
Suppressed: java.lang.NoSuchMethodError: io.grpc.protobuf.ProtoUtils.marshaller(Lorg/sparkproject/connect/protobuf/Message;)Lio/grpc/MethodDescriptor$Marshaller;
at org.apache.spark.connect.proto.SparkConnectServiceGrpc.getExecutePlanMethod(SparkConnectServiceGrpc.java:40)
at org.apache.spark.connect.proto.SparkConnectServiceGrpc$SparkConnectServiceBlockingStub.executePlan(SparkConnectServiceGrpc.java:242)
at org.apache.spark.sql.connect.client.SparkConnectClient.execute(SparkConnectClient.scala:64)
Run completed in 1 minute, 3 seconds.
at org.apache.spark.sql.SparkSession.execute(SparkSession.scala:119)
```
The key error message is `java.lang.NoSuchMethodError: io.grpc.protobuf.ProtoUtils.marshaller(Lorg/sparkproject/connect/protobuf/Message;)Lio/grpc/MethodDescriptor$Marshaller;`
The reason for the failure is that when we package `connect-client-jvm` module, we relocation the code path related to protobuf, but the relevant api in grpc has not been changed, so when we test `connect-client-jvm` shaded jar, will throw the above NoSuchMethodError exception.
So this pr add the shade and relocation rule of grpc to `connect-client-jvm` module, both maven and sbt. After this change, the above test can run successfully.
### Why are the changes needed?
Make `connect-client-jvm` shaded jar usable.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Pass GitHub Actions and manual test as described as above
Closes #39789 from LuciferYang/SPARK-42228.
Authored-by: yangjie01 <ya...@baidu.com>
Signed-off-by: Herman van Hovell <he...@databricks.com>
---
connector/connect/client/jvm/pom.xml | 12 ++++++++++++
project/SparkBuild.scala | 1 +
2 files changed, 13 insertions(+)
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 44c653462fa..e1f5cf0b41f 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -105,11 +105,19 @@
<artifactSet>
<includes>
<include>com.google.guava:*</include>
+ <include>io.grpc:*</include>
<include>com.google.protobuf:*</include>
<include>org.apache.spark:spark-connect-common_${scala.binary.version}</include>
</includes>
</artifactSet>
<relocations>
+ <relocation>
+ <pattern>io.grpc</pattern>
+ <shadedPattern>${spark.shade.packageName}.connect.client.grpc</shadedPattern>
+ <includes>
+ <include>io.grpc.**</include>
+ </includes>
+ </relocation>
<relocation>
<pattern>com.google.protobuf</pattern>
<shadedPattern>${spark.shade.packageName}.connect.protobuf</shadedPattern>
@@ -132,6 +140,10 @@
</includes>
</relocation>
</relocations>
+ <!--SPARK-42228: Add `ServicesResourceTransformer` to relocation class names in META-INF/services for grpc-->
+ <transformers>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
+ </transformers>
</configuration>
</plugin>
</plugins>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 9f478d649ba..9579123c25c 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -870,6 +870,7 @@ object SparkConnectClient {
},
(assembly / assemblyShadeRules) := Seq(
+ ShadeRule.rename("io.grpc.**" -> "org.sparkproject.connect.client.grpc.@0").inAll,
ShadeRule.rename("com.google.protobuf.**" -> "org.sparkproject.connect.protobuf.@1").inAll,
ShadeRule.rename("com.google.common.**" -> "org.sparkproject.connect.client.guava.@1").inAll,
ShadeRule.rename("com.google.thirdparty.**" -> "org.sparkproject.connect.client.guava.@1").inAll,
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org