You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "chong (Jira)" <ji...@apache.org> on 2022/05/24 10:23:00 UTC
[jira] [Updated] (SPARK-39209) Error occurs when cast a big enough long/float to timestamp
[ https://issues.apache.org/jira/browse/SPARK-39209?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
chong updated SPARK-39209:
--------------------------
Summary: Error occurs when cast a big enough long/float to timestamp (was: Error occurs when cast a big enough long to timestamp)
> Error occurs when cast a big enough long/float to timestamp
> ------------------------------------------------------------
>
> Key: SPARK-39209
> URL: https://issues.apache.org/jira/browse/SPARK-39209
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 3.3.0
> Environment: Spark 3.3.0
> Reporter: chong
> Priority: Major
>
>
> Got an error when casting a big enough long to a timestamp, should get the max timestamp according to the code in `Cast.scala`:
>
> {code:java}
> private[this] def longToTimestamp(t: Long): Long = SECONDS.toMicros(t)
> // the logic of SECONDS.toMicros is:
> static long x(long d, long m, long over) {
> if (d > Long.MAX_VALUE / 1000000L) return Long.MAX_VALUE;
> if (d < -(Long.MAX_VALUE / 1000000L)) return Long.MIN_VALUE;
> return d * m;
> }{code}
>
>
> Reproduce steps:
> {code:java}
> $SPARK_HOME/bin/spark-shell
> import spark.implicits._
> val df = Seq((Long.MaxValue / 1000000) + 1).toDF("a")
> df.selectExpr("cast(a as timestamp)").collect()
> // the result is right
> Array[org.apache.spark.sql.Row] = Array([294247-01-10 12:00:54.775807])
>
> import org.apache.spark.sql.types._
> import org.apache.spark.sql.Row
> val schema = StructType(Array(StructField("a", LongType)))
> val data = Seq(Row((Long.MaxValue / 1000000) + 1))
> val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema)
> df.selectExpr("cast(a as timestamp)").collect()
>
> // error occurs:
> java.lang.RuntimeException: Error while decoding: java.lang.ArithmeticException: long overflow
> createexternalrow(staticinvoke(class org.apache.spark.sql.catalyst.util.DateTimeUtils$, ObjectType(class java.sql.Timestamp), toJavaTimestamp, input[0, timestamp, true], true, false), StructField(a,TimestampType,true))
> at org.apache.spark.sql.errors.QueryExecutionErrors$.expressionDecodingError(QueryExecutionErrors.scala:1047)
> at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$Deserializer.apply(ExpressionEncoder.scala:184)
> at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$Deserializer.apply(ExpressionEncoder.scala:172)
> at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
> at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
> at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
> at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198)
> at scala.collection.TraversableLike.map(TraversableLike.scala:286)
> at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
> at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:198)
> at org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:3715)
> at org.apache.spark.sql.Dataset.$anonfun$collect$1(Dataset.scala:2971)
> at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3706)
> at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
> at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
> at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
> at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3704)
> at org.apache.spark.sql.Dataset.collect(Dataset.scala:2971)
> ... 51 elided
> Caused by: java.lang.ArithmeticException: long overflow
> at java.lang.Math.multiplyExact(Math.java:892)
> at org.apache.spark.sql.catalyst.util.DateTimeUtils$.millisToMicros(DateTimeUtils.scala:213)
> at org.apache.spark.sql.catalyst.util.RebaseDateTime$.rebaseGregorianToJulianMicros(RebaseDateTime.scala:362)
> at org.apache.spark.sql.catalyst.util.RebaseDateTime$.rebaseGregorianToJulianMicros(RebaseDateTime.scala:386)
> at org.apache.spark.sql.catalyst.util.DateTimeUtils$.toJavaTimestamp(DateTimeUtils.scala:146)
> at org.apache.spark.sql.catalyst.util.DateTimeUtils.toJavaTimestamp(DateTimeUtils.scala)
> at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Unknown Source)
> at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$Deserializer.apply(ExpressionEncoder.scala:181)
> ... 69 more
> {code}
>
> Another similar issue when casting a float to a timestamp
> The code should not overflow in non-ANSI mode:
> {code:java}
> if (d.isNaN || d.isInfinite) null else (d * MICROS_PER_SECOND).toLong{code}
>
> Reproduce steps:
> {code:java}
> import org.apache.spark.sql.types._
> import org.apache.spark.sql.Row
> val data = Seq(
> Row((Long.MaxValue / 1000000 + 100).toDouble),
> Row((-(Long.MaxValue / 1000000) - 100).toDouble))
> val schema = StructType(Array(StructField("a", DoubleType)))
> val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema)
> df.selectExpr("cast(a as timestamp)").collect()
> // Error
> java.lang.RuntimeException: Error while decoding: java.lang.ArithmeticException: long overflow
> createexternalrow(staticinvoke(class org.apache.spark.sql.catalyst.util.DateTimeUtils$, ObjectType(class java.sql.Timestamp), toJavaTimestamp, input[0, timestamp, true], true, false), StructField(a,TimestampType,true))
> at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$Deserializer.apply(ExpressionEncoder.scala:186)
> at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$Deserializer.apply(ExpressionEncoder.scala:173)
> at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238)
> at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
> at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
> at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198)
> at scala.collection.TraversableLike.map(TraversableLike.scala:238)
> at scala.collection.TraversableLike.map$(TraversableLike.scala:231)
> at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:198)
> at org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:3696)
> at org.apache.spark.sql.Dataset.$anonfun$collect$1(Dataset.scala:2965)
> at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3687)
> at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
> at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
> at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772)
> at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
> at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3685)
> at org.apache.spark.sql.Dataset.collect(Dataset.scala:2965)
> ... 51 elided
> Caused by: java.lang.ArithmeticException: long overflow
> at java.lang.Math.multiplyExact(Math.java:892)
> at org.apache.spark.sql.catalyst.util.DateTimeUtils$.millisToMicros(DateTimeUtils.scala:202)
> at org.apache.spark.sql.catalyst.util.RebaseDateTime$.rebaseGregorianToJulianMicros(RebaseDateTime.scala:361)
> at org.apache.spark.sql.catalyst.util.RebaseDateTime$.rebaseGregorianToJulianMicros(RebaseDateTime.scala:385)
> at org.apache.spark.sql.catalyst.util.DateTimeUtils$.toJavaTimestamp(DateTimeUtils.scala:135)
> at org.apache.spark.sql.catalyst.util.DateTimeUtils.toJavaTimestamp(DateTimeUtils.scala)
> at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Unknown Source)
> at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$Deserializer.apply(ExpressionEncoder.scala:182)
> ... 69 more
> // But if show, the result is OK.
> df.selectExpr("cast(a as timestamp)").show(false)
> +-----------------------------+
> |a |
> +-----------------------------+
> |+294247-01-10 12:00:54.775807|
> |-290308-12-22 04:04:48.224192|
> +-----------------------------+
> {code}
--
This message was sent by Atlassian Jira
(v8.20.7#820007)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org