You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ge...@apache.org on 2023/12/14 08:06:47 UTC

(spark) branch branch-3.5 updated: [SPARK-46396][SQL] Timestamp inference should not throw exception

This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a commit to branch branch-3.5
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.5 by this push:
     new 908c472728f2 [SPARK-46396][SQL] Timestamp inference should not throw exception
908c472728f2 is described below

commit 908c472728f24034baf0b59f03b04ca148eabeca
Author: Gengliang Wang <ge...@apache.org>
AuthorDate: Thu Dec 14 00:06:22 2023 -0800

    [SPARK-46396][SQL] Timestamp inference should not throw exception
    
    ### What changes were proposed in this pull request?
    
    When setting `spark.sql.legacy.timeParserPolicy=LEGACY`, Spark will use the LegacyFastTimestampFormatter to infer potential timestamp columns. The inference shouldn't throw exception.
    
    However, when the input is 23012150952, there is exception:
    
    ```
    
    For input string: "23012150952"
    
    java.lang.NumberFormatException: For input string: "23012150952"
    
    at java.base/java.lang.NumberFormatException.forInputString(NumberFormatException.java:67)
    
    at java.base/java.lang.Integer.parseInt(Integer.java:668)
    
    at java.base/java.lang.Integer.parseInt(Integer.java:786)
    
    at org.apache.commons.lang3.time.FastDateParser$NumberStrategy.parse(FastDateParser.java:304)
    
    at org.apache.commons.lang3.time.FastDateParser.parse(FastDateParser.java:1045)
    
    at org.apache.commons.lang3.time.FastDateFormat.parse(FastDateFormat.java:651)
    
    at org.apache.spark.sql.catalyst.util.LegacyFastTimestampFormatter.parseOptional(TimestampFormatter.scala:418)
    
    ```
    
    This PR is to fix the issue.
    
    ### Why are the changes needed?
    
    Bug fix, Timestamp inference should not throw exception
    ### Does this PR introduce _any_ user-facing change?
    
    NO
    
    ### How was this patch tested?
    
    New test case + existing tests
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No
    
    Closes #44338 from gengliangwang/fixParseOptional.
    
    Authored-by: Gengliang Wang <ge...@apache.org>
    Signed-off-by: Gengliang Wang <ge...@apache.org>
    (cherry picked from commit 4a79ae9d821e9b04fbe949251050c3e4819dff92)
    Signed-off-by: Gengliang Wang <ge...@apache.org>
---
 .../apache/spark/sql/catalyst/util/TimestampFormatter.scala  | 12 ++++++++----
 .../spark/sql/catalyst/util/TimestampFormatterSuite.scala    |  3 ++-
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
index 55eee41c14ca..0866cee9334c 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
@@ -414,10 +414,14 @@ class LegacyFastTimestampFormatter(
 
   override def parseOptional(s: String): Option[Long] = {
     cal.clear() // Clear the calendar because it can be re-used many times
-    if (fastDateFormat.parse(s, new ParsePosition(0), cal)) {
-      Some(extractMicros(cal))
-    } else {
-      None
+    try {
+      if (fastDateFormat.parse(s, new ParsePosition(0), cal)) {
+        Some(extractMicros(cal))
+      } else {
+        None
+      }
+    } catch {
+      case NonFatal(_) => None
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
index 2134a0d6ecd3..27d60815766d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
@@ -502,10 +502,11 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite {
 
     assert(fastFormatter.parseOptional("2023-12-31 23:59:59.9990").contains(1704067199999000L))
     assert(fastFormatter.parseOptional("abc").isEmpty)
+    assert(fastFormatter.parseOptional("23012150952").isEmpty)
 
     assert(simpleFormatter.parseOptional("2023-12-31 23:59:59.9990").contains(1704067208990000L))
     assert(simpleFormatter.parseOptional("abc").isEmpty)
-
+    assert(simpleFormatter.parseOptional("23012150952").isEmpty)
   }
 
   test("SPARK-45424: do not return optional parse results when only prefix match") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org