You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by "allisonwang-db (via GitHub)" <gi...@apache.org> on 2023/03/28 08:35:17 UTC

[GitHub] [spark] allisonwang-db commented on a diff in pull request #40575: [SPARK-42945][CONNECT] Support PYSPARK_JVM_STACKTRACE_ENABLED in Spark Connect

allisonwang-db commented on code in PR #40575:
URL: https://github.com/apache/spark/pull/40575#discussion_r1150223876


##########
connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala:
##########
@@ -109,23 +118,41 @@ class SparkConnectService(debug: Boolean)
    */
   private def handleError[V](
       opType: String,
-      observer: StreamObserver[V]): PartialFunction[Throwable, Unit] = {
-    case se: SparkException if isPythonExecutionException(se) =>
-      logError(s"Error during: $opType", se)
-      observer.onError(
-        StatusProto.toStatusRuntimeException(buildStatusFromThrowable(se.getCause)))
-
-    case e: Throwable if e.isInstanceOf[SparkThrowable] || NonFatal.apply(e) =>
-      logError(s"Error during: $opType", e)
-      observer.onError(StatusProto.toStatusRuntimeException(buildStatusFromThrowable(e)))
-
-    case e: Throwable =>
-      logError(s"Error during: $opType", e)
-      observer.onError(
-        Status.UNKNOWN
-          .withCause(e)
-          .withDescription(StringUtils.abbreviate(e.getMessage, 2048))
-          .asRuntimeException())
+      observer: StreamObserver[V],
+      userId: String,
+      sessionId: String): PartialFunction[Throwable, Unit] = {
+    val session =
+      SparkConnectService
+        .getOrCreateIsolatedSession(userId, sessionId)
+        .session
+    val stackTraceEnabled = try {
+      session.conf.get(
+        org.apache.spark.sql.internal.SQLConf.PYSPARK_JVM_STACKTRACE_ENABLED.key).toBoolean
+    } catch {
+      case NonFatal(_) => true
+    }
+
+    {
+      case se: SparkException if isPythonExecutionException(se) =>
+        logError(s"Error during: $opType", se)

Review Comment:
   @HyukjinKwon @grundprinzip Another thing that could improve debuggability is to include the `user_id` and `session_id` in this error log. WDYT?



##########
connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala:
##########
@@ -109,23 +118,41 @@ class SparkConnectService(debug: Boolean)
    */
   private def handleError[V](
       opType: String,
-      observer: StreamObserver[V]): PartialFunction[Throwable, Unit] = {
-    case se: SparkException if isPythonExecutionException(se) =>
-      logError(s"Error during: $opType", se)
-      observer.onError(
-        StatusProto.toStatusRuntimeException(buildStatusFromThrowable(se.getCause)))
-
-    case e: Throwable if e.isInstanceOf[SparkThrowable] || NonFatal.apply(e) =>
-      logError(s"Error during: $opType", e)
-      observer.onError(StatusProto.toStatusRuntimeException(buildStatusFromThrowable(e)))
-
-    case e: Throwable =>
-      logError(s"Error during: $opType", e)
-      observer.onError(
-        Status.UNKNOWN
-          .withCause(e)
-          .withDescription(StringUtils.abbreviate(e.getMessage, 2048))
-          .asRuntimeException())
+      observer: StreamObserver[V],
+      userId: String,
+      sessionId: String): PartialFunction[Throwable, Unit] = {
+    val session =
+      SparkConnectService
+        .getOrCreateIsolatedSession(userId, sessionId)
+        .session
+    val stackTraceEnabled = try {
+      session.conf.get(
+        org.apache.spark.sql.internal.SQLConf.PYSPARK_JVM_STACKTRACE_ENABLED.key).toBoolean
+    } catch {
+      case NonFatal(_) => true
+    }
+
+    {
+      case se: SparkException if isPythonExecutionException(se) =>
+        logError(s"Error during: $opType", se)
+        observer.onError(
+          StatusProto.toStatusRuntimeException(
+            buildStatusFromThrowable(se.getCause, stackTraceEnabled)))
+
+      case e: Throwable if e.isInstanceOf[SparkThrowable] || NonFatal.apply(e) =>
+        logError(s"Error during: $opType", e)
+        observer.onError(
+          StatusProto.toStatusRuntimeException(
+            buildStatusFromThrowable(e, stackTraceEnabled)))
+
+      case e: Throwable =>
+        logError(s"Error during: $opType", e)
+        observer.onError(
+          Status.UNKNOWN
+            .withCause(e)
+            .withDescription(StringUtils.abbreviate(e.getMessage, 2048))

Review Comment:
   Do we want to display the stack trace for this unknown error? What are some examples of this error?



##########
connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala:
##########
@@ -143,7 +170,13 @@ class SparkConnectService(debug: Boolean)
       responseObserver: StreamObserver[proto.ExecutePlanResponse]): Unit = {
     try {
       new SparkConnectStreamHandler(responseObserver).handle(request)
-    } catch handleError("execute", observer = responseObserver)
+    } catch {
+      handleError(
+        "execute",
+        observer = responseObserver,
+        userId = request.getUserContext.getUserId,
+        sessionId = request.getSessionId)

Review Comment:
   Is it possible to throw any error when getting the UserContext and SessionId?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org