You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ya...@apache.org on 2020/03/19 15:54:38 UTC

[spark] branch branch-3.0 updated: [SPARK-30292][SQL][FOLLOWUP] ansi cast from strings to integral numbers (byte/short/int/long) should fail with fraction

This is an automated email from the ASF dual-hosted git repository.

yamamuro pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 1a5cd16  [SPARK-30292][SQL][FOLLOWUP] ansi cast from strings to integral numbers (byte/short/int/long) should fail with fraction
1a5cd16 is described below

commit 1a5cd167e0901948d68d6c7880d39966e74d10b3
Author: Wenchen Fan <we...@databricks.com>
AuthorDate: Fri Mar 20 00:52:09 2020 +0900

    [SPARK-30292][SQL][FOLLOWUP] ansi cast from strings to integral numbers (byte/short/int/long) should fail with fraction
    
    ### What changes were proposed in this pull request?
    
    This is a followup of https://github.com/apache/spark/pull/26933
    
    Fraction string like "1.23" is definitely not a valid integral format and we should fail to do the cast under the ANSI mode.
    
    ### Why are the changes needed?
    
    correct the ANSI cast behavior from string to integral
    
    ### Does this PR introduce any user-facing change?
    
    Yes under ANSI mode, but ANSI mode is off by default.
    
    ### How was this patch tested?
    
    new test
    
    Closes #27957 from cloud-fan/ansi.
    
    Authored-by: Wenchen Fan <we...@databricks.com>
    Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
    (cherry picked from commit ac262cb27255f989f6a6dd864bd5114a928b96da)
    Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
---
 .../org/apache/spark/unsafe/types/UTF8String.java  | 24 +++++++++++++---------
 .../spark/sql/catalyst/expressions/CastSuite.scala |  2 ++
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index c538466..186597f 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -1105,6 +1105,10 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
    * @return true if the parsing was successful else false
    */
   public boolean toLong(LongWrapper toLongResult) {
+    return toLong(toLongResult, true);
+  }
+
+  private boolean toLong(LongWrapper toLongResult, boolean allowDecimal) {
     int offset = 0;
     while (offset < this.numBytes && getByte(offset) <= ' ') offset++;
     if (offset == this.numBytes) return false;
@@ -1129,7 +1133,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
     while (offset <= end) {
       b = getByte(offset);
       offset++;
-      if (b == separator) {
+      if (b == separator && allowDecimal) {
         // We allow decimals and will return a truncated integral in that case.
         // Therefore we won't throw an exception here (checking the fractional
         // part happens below.)
@@ -1198,6 +1202,10 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
    * @return true if the parsing was successful else false
    */
   public boolean toInt(IntWrapper intWrapper) {
+    return toInt(intWrapper, true);
+  }
+
+  private boolean toInt(IntWrapper intWrapper, boolean allowDecimal) {
     int offset = 0;
     while (offset < this.numBytes && getByte(offset) <= ' ') offset++;
     if (offset == this.numBytes) return false;
@@ -1222,7 +1230,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
     while (offset <= end) {
       b = getByte(offset);
       offset++;
-      if (b == separator) {
+      if (b == separator && allowDecimal) {
         // We allow decimals and will return a truncated integral in that case.
         // Therefore we won't throw an exception here (checking the fractional
         // part happens below.)
@@ -1276,9 +1284,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
     if (toInt(intWrapper)) {
       int intValue = intWrapper.value;
       short result = (short) intValue;
-      if (result == intValue) {
-        return true;
-      }
+      return result == intValue;
     }
     return false;
   }
@@ -1287,9 +1293,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
     if (toInt(intWrapper)) {
       int intValue = intWrapper.value;
       byte result = (byte) intValue;
-      if (result == intValue) {
-        return true;
-      }
+      return result == intValue;
     }
     return false;
   }
@@ -1302,7 +1306,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
    */
   public long toLongExact() {
     LongWrapper result = new LongWrapper();
-    if (toLong(result)) {
+    if (toLong(result, false)) {
       return result.value;
     }
     throw new NumberFormatException("invalid input syntax for type numeric: " + this);
@@ -1316,7 +1320,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
    */
   public int toIntExact() {
     IntWrapper result = new IntWrapper();
-    if (toInt(result)) {
+    if (toInt(result, false)) {
       return result.value;
     }
     throw new NumberFormatException("invalid input syntax for type numeric: " + this);
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index bde95f0..9d20eac 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -1287,6 +1287,8 @@ class AnsiCastSuite extends CastSuiteBase {
         cast("123-string", dataType), "invalid input")
       checkExceptionInExpression[NumberFormatException](
         cast("2020-07-19", dataType), "invalid input")
+      checkExceptionInExpression[NumberFormatException](
+        cast("1.23", dataType), "invalid input")
     }
 
     Seq(DoubleType, FloatType, DecimalType.USER_DEFAULT).foreach { dataType =>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org