You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ya...@apache.org on 2020/03/19 15:54:38 UTC
[spark] branch branch-3.0 updated: [SPARK-30292][SQL][FOLLOWUP]
ansi cast from strings to integral numbers (byte/short/int/long) should
fail with fraction
This is an automated email from the ASF dual-hosted git repository.
yamamuro pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 1a5cd16 [SPARK-30292][SQL][FOLLOWUP] ansi cast from strings to integral numbers (byte/short/int/long) should fail with fraction
1a5cd16 is described below
commit 1a5cd167e0901948d68d6c7880d39966e74d10b3
Author: Wenchen Fan <we...@databricks.com>
AuthorDate: Fri Mar 20 00:52:09 2020 +0900
[SPARK-30292][SQL][FOLLOWUP] ansi cast from strings to integral numbers (byte/short/int/long) should fail with fraction
### What changes were proposed in this pull request?
This is a followup of https://github.com/apache/spark/pull/26933
Fraction string like "1.23" is definitely not a valid integral format and we should fail to do the cast under the ANSI mode.
### Why are the changes needed?
correct the ANSI cast behavior from string to integral
### Does this PR introduce any user-facing change?
Yes under ANSI mode, but ANSI mode is off by default.
### How was this patch tested?
new test
Closes #27957 from cloud-fan/ansi.
Authored-by: Wenchen Fan <we...@databricks.com>
Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
(cherry picked from commit ac262cb27255f989f6a6dd864bd5114a928b96da)
Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
---
.../org/apache/spark/unsafe/types/UTF8String.java | 24 +++++++++++++---------
.../spark/sql/catalyst/expressions/CastSuite.scala | 2 ++
2 files changed, 16 insertions(+), 10 deletions(-)
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index c538466..186597f 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -1105,6 +1105,10 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
* @return true if the parsing was successful else false
*/
public boolean toLong(LongWrapper toLongResult) {
+ return toLong(toLongResult, true);
+ }
+
+ private boolean toLong(LongWrapper toLongResult, boolean allowDecimal) {
int offset = 0;
while (offset < this.numBytes && getByte(offset) <= ' ') offset++;
if (offset == this.numBytes) return false;
@@ -1129,7 +1133,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
while (offset <= end) {
b = getByte(offset);
offset++;
- if (b == separator) {
+ if (b == separator && allowDecimal) {
// We allow decimals and will return a truncated integral in that case.
// Therefore we won't throw an exception here (checking the fractional
// part happens below.)
@@ -1198,6 +1202,10 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
* @return true if the parsing was successful else false
*/
public boolean toInt(IntWrapper intWrapper) {
+ return toInt(intWrapper, true);
+ }
+
+ private boolean toInt(IntWrapper intWrapper, boolean allowDecimal) {
int offset = 0;
while (offset < this.numBytes && getByte(offset) <= ' ') offset++;
if (offset == this.numBytes) return false;
@@ -1222,7 +1230,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
while (offset <= end) {
b = getByte(offset);
offset++;
- if (b == separator) {
+ if (b == separator && allowDecimal) {
// We allow decimals and will return a truncated integral in that case.
// Therefore we won't throw an exception here (checking the fractional
// part happens below.)
@@ -1276,9 +1284,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
if (toInt(intWrapper)) {
int intValue = intWrapper.value;
short result = (short) intValue;
- if (result == intValue) {
- return true;
- }
+ return result == intValue;
}
return false;
}
@@ -1287,9 +1293,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
if (toInt(intWrapper)) {
int intValue = intWrapper.value;
byte result = (byte) intValue;
- if (result == intValue) {
- return true;
- }
+ return result == intValue;
}
return false;
}
@@ -1302,7 +1306,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
*/
public long toLongExact() {
LongWrapper result = new LongWrapper();
- if (toLong(result)) {
+ if (toLong(result, false)) {
return result.value;
}
throw new NumberFormatException("invalid input syntax for type numeric: " + this);
@@ -1316,7 +1320,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
*/
public int toIntExact() {
IntWrapper result = new IntWrapper();
- if (toInt(result)) {
+ if (toInt(result, false)) {
return result.value;
}
throw new NumberFormatException("invalid input syntax for type numeric: " + this);
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index bde95f0..9d20eac 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -1287,6 +1287,8 @@ class AnsiCastSuite extends CastSuiteBase {
cast("123-string", dataType), "invalid input")
checkExceptionInExpression[NumberFormatException](
cast("2020-07-19", dataType), "invalid input")
+ checkExceptionInExpression[NumberFormatException](
+ cast("1.23", dataType), "invalid input")
}
Seq(DoubleType, FloatType, DecimalType.USER_DEFAULT).foreach { dataType =>
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org