You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2018/09/27 00:47:09 UTC
spark git commit: [SPARK-25454][SQL] add a new config for picking
minimum precision for integral literals
Repository: spark
Updated Branches:
refs/heads/master 51540c2fa -> d0990e3df
[SPARK-25454][SQL] add a new config for picking minimum precision for integral literals
## What changes were proposed in this pull request?
https://github.com/apache/spark/pull/20023 proposed to allow precision lose during decimal operations, to reduce the possibilities of overflow. This is a behavior change and is protected by the DECIMAL_OPERATIONS_ALLOW_PREC_LOSS config. However, that PR introduced another behavior change: pick a minimum precision for integral literals, which is not protected by a config. This PR add a new config for it: `spark.sql.literal.pickMinimumPrecision`.
This can allow users to work around issue in SPARK-25454, which is caused by a long-standing bug of negative scale.
## How was this patch tested?
a new test
Closes #22494 from cloud-fan/decimal.
Authored-by: Wenchen Fan <we...@databricks.com>
Signed-off-by: gatorsmile <ga...@gmail.com>
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d0990e3d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d0990e3d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d0990e3d
Branch: refs/heads/master
Commit: d0990e3dfee752a6460a6360e1a773138364d774
Parents: 51540c2
Author: Wenchen Fan <we...@databricks.com>
Authored: Wed Sep 26 17:47:05 2018 -0700
Committer: gatorsmile <ga...@gmail.com>
Committed: Wed Sep 26 17:47:05 2018 -0700
----------------------------------------------------------------------
.../spark/sql/catalyst/analysis/DecimalPrecision.scala | 10 ++++++----
.../scala/org/apache/spark/sql/internal/SQLConf.scala | 11 +++++++++++
.../test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 7 +++++++
3 files changed, 24 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/d0990e3d/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
index e511f80..8269233 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
@@ -290,11 +290,13 @@ object DecimalPrecision extends TypeCoercionRule {
// potentially loosing 11 digits of the fractional part. Using only the precision needed
// by the Literal, instead, the result would be DECIMAL(38 + 1 + 1, 18), which would
// become DECIMAL(38, 16), safely having a much lower precision loss.
- case (l: Literal, r) if r.dataType.isInstanceOf[DecimalType]
- && l.dataType.isInstanceOf[IntegralType] =>
+ case (l: Literal, r) if r.dataType.isInstanceOf[DecimalType] &&
+ l.dataType.isInstanceOf[IntegralType] &&
+ SQLConf.get.literalPickMinimumPrecision =>
b.makeCopy(Array(Cast(l, DecimalType.fromLiteral(l)), r))
- case (l, r: Literal) if l.dataType.isInstanceOf[DecimalType]
- && r.dataType.isInstanceOf[IntegralType] =>
+ case (l, r: Literal) if l.dataType.isInstanceOf[DecimalType] &&
+ r.dataType.isInstanceOf[IntegralType] &&
+ SQLConf.get.literalPickMinimumPrecision =>
b.makeCopy(Array(l, Cast(r, DecimalType.fromLiteral(r))))
// Promote integers inside a binary expression with fixed-precision decimals to decimals,
// and fixed-precision decimals in an expression with floats / doubles to doubles
http://git-wip-us.apache.org/repos/asf/spark/blob/d0990e3d/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 2f4d660..f6c9880 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1331,6 +1331,15 @@ object SQLConf {
.booleanConf
.createWithDefault(true)
+ val LITERAL_PICK_MINIMUM_PRECISION =
+ buildConf("spark.sql.legacy.literal.pickMinimumPrecision")
+ .internal()
+ .doc("When integral literal is used in decimal operations, pick a minimum precision " +
+ "required by the literal if this config is true, to make the resulting precision and/or " +
+ "scale smaller. This can reduce the possibility of precision lose and/or overflow.")
+ .booleanConf
+ .createWithDefault(true)
+
val SQL_OPTIONS_REDACTION_PATTERN =
buildConf("spark.sql.redaction.options.regex")
.doc("Regex to decide which keys in a Spark SQL command's options map contain sensitive " +
@@ -1925,6 +1934,8 @@ class SQLConf extends Serializable with Logging {
def decimalOperationsAllowPrecisionLoss: Boolean = getConf(DECIMAL_OPERATIONS_ALLOW_PREC_LOSS)
+ def literalPickMinimumPrecision: Boolean = getConf(LITERAL_PICK_MINIMUM_PRECISION)
+
def continuousStreamingExecutorQueueSize: Int = getConf(CONTINUOUS_STREAMING_EXECUTOR_QUEUE_SIZE)
def continuousStreamingExecutorPollIntervalMs: Long =
http://git-wip-us.apache.org/repos/asf/spark/blob/d0990e3d/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 8fcebb3..631ab1b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2849,6 +2849,13 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
val result = ds.flatMap(_.bar).distinct
result.rdd.isEmpty
}
+
+ test("SPARK-25454: decimal division with negative scale") {
+ // TODO: completely fix this issue even when LITERAL_PRECISE_PRECISION is true.
+ withSQLConf(SQLConf.LITERAL_PICK_MINIMUM_PRECISION.key -> "false") {
+ checkAnswer(sql("select 26393499451 / (1e6 * 1000)"), Row(BigDecimal("26.3934994510000")))
+ }
+ }
}
case class Foo(bar: Option[String])
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org