You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2021/10/06 10:08:51 UTC

[spark] branch branch-3.0 updated: [SPARK-36919][SQL] Make BadRecordException fields transient

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new d454d4e  [SPARK-36919][SQL] Make BadRecordException fields transient
d454d4e is described below

commit d454d4efac75823eb07809d913adb1db5ee440f7
Author: tianhanhu <ad...@gmail.com>
AuthorDate: Wed Oct 6 19:06:09 2021 +0900

    [SPARK-36919][SQL] Make BadRecordException fields transient
    
    ### What changes were proposed in this pull request?
    Migrating a Spark application from 2.4.x to 3.1.x and finding a difference in the exception chaining behavior. In a case of parsing a malformed CSV, where the root cause exception should be Caused by: java.lang.RuntimeException: Malformed CSV record, only the top level exception is kept, and all lower level exceptions and root cause are lost. Thus, when we call ExceptionUtils.getRootCause on the exception, we still get itself.
    The reason for the difference is that RuntimeException is wrapped in BadRecordException, which has unserializable fields. When we try to serialize the exception from tasks and deserialize from scheduler, the exception is lost.
    This PR makes unserializable fields of BadRecordException transient, so the rest of the exception could be serialized and deserialized properly.
    
    ### Why are the changes needed?
    Make BadRecordException serializable
    
    ### Does this PR introduce _any_ user-facing change?
    User could get root cause of BadRecordException
    
    ### How was this patch tested?
    Unit testing
    
    Closes #34167 from tianhanhu/master.
    
    Authored-by: tianhanhu <ad...@gmail.com>
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
    (cherry picked from commit aed977c4682b6f378a26050ffab51b9b2075cae4)
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
 .../scala/org/apache/spark/sql/catalyst/util/BadRecordException.scala | 4 ++--
 .../org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala     | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/BadRecordException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/BadRecordException.scala
index d719a33..67defe7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/BadRecordException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/BadRecordException.scala
@@ -38,6 +38,6 @@ case class PartialResultException(
  * @param cause the actual exception about why the record is bad and can't be parsed.
  */
 case class BadRecordException(
-    record: () => UTF8String,
-    partialResult: () => Option[InternalRow],
+    @transient record: () => UTF8String,
+    @transient partialResult: () => Option[InternalRow],
     cause: Throwable) extends Exception(cause)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index 3b564b6..ab6fe27 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -29,6 +29,7 @@ import scala.collection.JavaConverters._
 import scala.util.Properties
 
 import com.univocity.parsers.common.TextParsingException
+import org.apache.commons.lang3.exception.ExceptionUtils
 import org.apache.commons.lang3.time.FastDateFormat
 import org.apache.hadoop.io.SequenceFile.CompressionType
 import org.apache.hadoop.io.compress.GzipCodec
@@ -357,6 +358,7 @@ abstract class CSVSuite extends QueryTest with SharedSparkSession with TestCsvDa
       }
 
       assert(exception.getMessage.contains("Malformed CSV record"))
+      assert(ExceptionUtils.getRootCause(exception).isInstanceOf[RuntimeException])
     }
   }
 

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org