You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by hv...@apache.org on 2023/02/01 18:10:57 UTC

[spark] branch branch-3.4 updated: [SPARK-42228][BUILD][CONNECT] Add shade and relocation rule of grpc to connect-client-jvm module

This is an automated email from the ASF dual-hosted git repository.

hvanhovell pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.4 by this push:
     new a3a0a734074 [SPARK-42228][BUILD][CONNECT] Add shade and relocation rule of grpc to connect-client-jvm module
a3a0a734074 is described below

commit a3a0a7340747c06e9cd98b2bf3d36985b66faae9
Author: yangjie01 <ya...@baidu.com>
AuthorDate: Wed Feb 1 14:10:30 2023 -0400

    [SPARK-42228][BUILD][CONNECT] Add shade and relocation rule of grpc to connect-client-jvm module
    
    ### What changes were proposed in this pull request?
    When I try to do E2E test for java connnec client and connect server out of `connect-client-jvm` module, for example, just move `ClientE2ETestSuite` into a separate module and run maven test, then I found the following errors:
    
    ```
    ClientE2ETestSuite:
    Starting the Spark Connect Server...
    Using jar: /${basedir}/spark-mine/connector/connect/server/target/spark-connect_2.12-3.5.0-SNAPSHOT.jar
    Setting default log level to "WARN".
    To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
    Spark context Web UI available at http://localhost:4040
    Spark context available as 'sc' (master = local[*], app id = local-1674980902694).
    Spark session available as 'spark'.
    Welcome to
          ____              __
         / __/__  ___ _____/ /__
        _\ \/ _ \/ _ `/ __/  '_/
       /___/ .__/\_,_/_/ /_/\_\   version 3.5.0-SNAPSHOT
          /_/
    
    Using Scala version 2.12.17 (OpenJDK 64-Bit Server VM, Java 1.8.0_352)
    Type in expressions to have them evaluated.
    Type :help for more information.
    
    java.lang.RuntimeException: Failed to start the test server on port 15290.
            at org.apache.spark.sql.connect.client.util.RemoteSparkSession.beforeAll(RemoteSparkSession.scala:158)
            at org.apache.spark.sql.connect.client.util.RemoteSparkSession.beforeAll$(RemoteSparkSession.scala:149)
            at org.apache.spark.sql.ClientE2ETestSuite.beforeAll(ClientE2ETestSuite.scala:22)
            at org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:212)
            at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210)
            at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208)
            at org.apache.spark.sql.ClientE2ETestSuite.run(ClientE2ETestSuite.scala:22)
            at org.scalatest.Suite.callExecuteOnSuite$1(Suite.scala:1178)
            at org.scalatest.Suite.$anonfun$runNestedSuites$1(Suite.scala:1225)
            at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
            at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
            at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198)
            at org.scalatest.Suite.runNestedSuites(Suite.scala:1223)
            at org.scalatest.Suite.runNestedSuites$(Suite.scala:1156)
            at org.scalatest.tools.DiscoverySuite.runNestedSuites(DiscoverySuite.scala:30)
            at org.scalatest.Suite.run(Suite.scala:1111)
            at org.scalatest.Suite.run$(Suite.scala:1096)
            at org.scalatest.tools.DiscoverySuite.run(DiscoverySuite.scala:30)
            at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:47)
            at org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13(Runner.scala:1321)
            at org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13$adapted(Runner.scala:1315)
            at scala.collection.immutable.List.foreach(List.scala:431)
            at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:1315)
            at org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24(Runner.scala:992)
            at org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24$adapted(Runner.scala:970)
            at org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:1481)
    org.apache.spark.sql.ClientE2ETestSuite *** ABORTED ***
            at org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:970)
      java.lang.RuntimeException: Failed to start the test server on port 15290.
      at org.apache.spark.sql.connect.client.util.RemoteSparkSession.beforeAll(RemoteSparkSession.scala:158)
      at org.apache.spark.sql.connect.client.util.RemoteSparkSession.beforeAll$(RemoteSparkSession.scala:149)
      at org.apache.spark.sql.ClientE2ETestSuite.beforeAll(ClientE2ETestSuite.scala:22)
      at org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:212)
      at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210)
      at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208)
      at org.apache.spark.sql.ClientE2ETestSuite.run(ClientE2ETestSuite.scala:22)
      at org.scalatest.Suite.callExecuteOnSuite$1(Suite.scala:1178)
      at org.scalatest.Suite.$anonfun$runNestedSuites$1(Suite.scala:1225)
            at org.scalatest.tools.Runner$.main(Runner.scala:775)
            at org.scalatest.tools.Runner.main(Runner.scala)
            Suppressed: java.lang.NoSuchMethodError: io.grpc.protobuf.ProtoUtils.marshaller(Lorg/sparkproject/connect/protobuf/Message;)Lio/grpc/MethodDescriptor$Marshaller;
                    at org.apache.spark.connect.proto.SparkConnectServiceGrpc.getExecutePlanMethod(SparkConnectServiceGrpc.java:40)
                    at org.apache.spark.connect.proto.SparkConnectServiceGrpc$SparkConnectServiceBlockingStub.executePlan(SparkConnectServiceGrpc.java:242)
                    at org.apache.spark.sql.connect.client.SparkConnectClient.execute(SparkConnectClient.scala:64)
                    at org.apache.spark.sql.SparkSession.execute(SparkSession.scala:119)
      at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
                    at org.apache.spark.sql.Dataset.collectResult(Dataset.scala:73)
                    at org.apache.spark.sql.connect.client.util.RemoteSparkSession.beforeAll(RemoteSparkSession.scala:164)
                    ... 28 more
      ...
            Suppressed: java.lang.NoSuchMethodError: io.grpc.protobuf.ProtoUtils.marshaller(Lorg/sparkproject/connect/protobuf/Message;)Lio/grpc/MethodDescriptor$Marshaller;
                    at org.apache.spark.connect.proto.SparkConnectServiceGrpc.getExecutePlanMethod(SparkConnectServiceGrpc.java:40)
                    at org.apache.spark.connect.proto.SparkConnectServiceGrpc$SparkConnectServiceBlockingStub.executePlan(SparkConnectServiceGrpc.java:242)
                    at org.apache.spark.sql.connect.client.SparkConnectClient.execute(SparkConnectClient.scala:64)
                    at org.apache.spark.sql.SparkSession.execute(SparkSession.scala:119)
                    at org.apache.spark.sql.Dataset.collectResult(Dataset.scala:73)
                    at org.apache.spark.sql.connect.client.util.RemoteSparkSession.beforeAll(RemoteSparkSession.scala:164)
                    ... 28 more
            Suppressed: java.lang.NoSuchMethodError: io.grpc.protobuf.ProtoUtils.marshaller(Lorg/sparkproject/connect/protobuf/Message;)Lio/grpc/MethodDescriptor$Marshaller;
                    at org.apache.spark.connect.proto.SparkConnectServiceGrpc.getExecutePlanMethod(SparkConnectServiceGrpc.java:40)
                    at org.apache.spark.connect.proto.SparkConnectServiceGrpc$SparkConnectServiceBlockingStub.executePlan(SparkConnectServiceGrpc.java:242)
                    at org.apache.spark.sql.connect.client.SparkConnectClient.execute(SparkConnectClient.scala:64)
                    at org.apache.spark.sql.SparkSession.execute(SparkSession.scala:119)
                    at org.apache.spark.sql.Dataset.collectResult(Dataset.scala:73)
                    at org.apache.spark.sql.connect.client.util.RemoteSparkSession.beforeAll(RemoteSparkSession.scala:164)
                    ... 28 more
            Suppressed: java.lang.NoSuchMethodError: io.grpc.protobuf.ProtoUtils.marshaller(Lorg/sparkproject/connect/protobuf/Message;)Lio/grpc/MethodDescriptor$Marshaller;
                    at org.apache.spark.connect.proto.SparkConnectServiceGrpc.getExecutePlanMethod(SparkConnectServiceGrpc.java:40)
                    at org.apache.spark.connect.proto.SparkConnectServiceGrpc$SparkConnectServiceBlockingStub.executePlan(SparkConnectServiceGrpc.java:242)
                    at org.apache.spark.sql.connect.client.SparkConnectClient.execute(SparkConnectClient.scala:64)
    Run completed in 1 minute, 3 seconds.
                    at org.apache.spark.sql.SparkSession.execute(SparkSession.scala:119)
    ```
    
    The key error message is `java.lang.NoSuchMethodError: io.grpc.protobuf.ProtoUtils.marshaller(Lorg/sparkproject/connect/protobuf/Message;)Lio/grpc/MethodDescriptor$Marshaller;`
    
    The reason for the failure is that when we package `connect-client-jvm` module, we relocation the code path related to protobuf, but the relevant api in grpc has not been changed, so when we test  `connect-client-jvm` shaded jar, will throw the above NoSuchMethodError exception.
    
    So this pr add the shade and relocation rule of grpc to `connect-client-jvm` module, both maven and sbt. After this change, the above test can run successfully.
    
    ### Why are the changes needed?
    Make `connect-client-jvm` shaded jar usable.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    Pass GitHub Actions and  manual test as described as above
    
    Closes #39789 from LuciferYang/SPARK-42228.
    
    Authored-by: yangjie01 <ya...@baidu.com>
    Signed-off-by: Herman van Hovell <he...@databricks.com>
    (cherry picked from commit 48ab301dad5d632c0578bd48fcd6fb5b1173fbed)
    Signed-off-by: Herman van Hovell <he...@databricks.com>
---
 connector/connect/client/jvm/pom.xml | 12 ++++++++++++
 project/SparkBuild.scala             |  1 +
 2 files changed, 13 insertions(+)

diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 6113fab1ba6..94e49033858 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -105,11 +105,19 @@
           <artifactSet>
             <includes>
               <include>com.google.guava:*</include>
+              <include>io.grpc:*</include>
               <include>com.google.protobuf:*</include>
               <include>org.apache.spark:spark-connect-common_${scala.binary.version}</include>
             </includes>
           </artifactSet>
           <relocations>
+            <relocation>
+              <pattern>io.grpc</pattern>
+              <shadedPattern>${spark.shade.packageName}.connect.client.grpc</shadedPattern>
+              <includes>
+                <include>io.grpc.**</include>
+              </includes>
+            </relocation>
             <relocation>
               <pattern>com.google.protobuf</pattern>
               <shadedPattern>${spark.shade.packageName}.connect.protobuf</shadedPattern>
@@ -132,6 +140,10 @@
               </includes>
             </relocation>
           </relocations>
+          <!--SPARK-42228: Add `ServicesResourceTransformer` to relocation class names in META-INF/services for grpc-->
+          <transformers>
+            <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
+          </transformers>
         </configuration>
       </plugin>
     </plugins>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 9f478d649ba..9579123c25c 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -870,6 +870,7 @@ object SparkConnectClient {
     },
 
     (assembly / assemblyShadeRules) := Seq(
+      ShadeRule.rename("io.grpc.**" -> "org.sparkproject.connect.client.grpc.@0").inAll,
       ShadeRule.rename("com.google.protobuf.**" -> "org.sparkproject.connect.protobuf.@1").inAll,
       ShadeRule.rename("com.google.common.**" -> "org.sparkproject.connect.client.guava.@1").inAll,
       ShadeRule.rename("com.google.thirdparty.**" -> "org.sparkproject.connect.client.guava.@1").inAll,


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org