You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/06/11 21:16:56 UTC
[spark] branch master updated: [SPARK-21117][SQL] Built-in SQL
Function Support - WIDTH_BUCKET
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new b1adc3d [SPARK-21117][SQL] Built-in SQL Function Support - WIDTH_BUCKET
b1adc3d is described below
commit b1adc3deee00058cba669534aee156dc7af243dc
Author: Takeshi Yamamuro <ya...@apache.org>
AuthorDate: Thu Jun 11 14:15:28 2020 -0700
[SPARK-21117][SQL] Built-in SQL Function Support - WIDTH_BUCKET
### What changes were proposed in this pull request?
This PR intends to add a build-in SQL function - `WIDTH_BUCKET`.
It is the rework of #18323.
Closes #18323
The other RDBMS references for `WIDTH_BUCKET`:
- Oracle: https://docs.oracle.com/cd/B28359_01/olap.111/b28126/dml_functions_2137.htm#OLADM717
- PostgreSQL: https://www.postgresql.org/docs/current/functions-math.html
- Snowflake: https://docs.snowflake.com/en/sql-reference/functions/width_bucket.html
- Prestodb: https://prestodb.io/docs/current/functions/math.html
- Teradata: https://docs.teradata.com/reader/kmuOwjp1zEYg98JsB8fu_A/Wa8vw69cGzoRyNULHZeudg
- DB2: https://www.ibm.com/support/producthub/db2/docs/content/SSEPGG_11.5.0/com.ibm.db2.luw.sql.ref.doc/doc/r0061483.html?pos=2
### Why are the changes needed?
For better usability.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Added unit tests.
Closes #28764 from maropu/SPARK-21117.
Lead-authored-by: Takeshi Yamamuro <ya...@apache.org>
Co-authored-by: Yuming Wang <wg...@gmail.com>
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
.../sql/catalyst/analysis/FunctionRegistry.scala | 1 +
.../sql/catalyst/expressions/mathExpressions.scala | 96 ++++++++++++
.../sql-functions/sql-expression-schema.md | 3 +-
.../test/resources/sql-tests/inputs/operators.sql | 14 ++
.../sql-tests/inputs/postgreSQL/numeric.sql | 76 +++++----
.../resources/sql-tests/results/operators.sql.out | 98 +++++++++++-
.../sql-tests/results/postgreSQL/numeric.sql.out | 173 ++++++++++++++++++++-
7 files changed, 431 insertions(+), 30 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index e2559d4..3989df5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -274,6 +274,7 @@ object FunctionRegistry {
expression[Tan]("tan"),
expression[Cot]("cot"),
expression[Tanh]("tanh"),
+ expression[WidthBucket]("width_bucket"),
expression[Add]("+"),
expression[Subtract]("-"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
index fe8ea2a..5c76495 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
@@ -1325,3 +1325,99 @@ case class BRound(child: Expression, scale: Expression)
with Serializable with ImplicitCastInputTypes {
def this(child: Expression) = this(child, Literal(0))
}
+
+object WidthBucket {
+
+ def computeBucketNumber(value: Double, min: Double, max: Double, numBucket: Long): jl.Long = {
+ if (numBucket <= 0 || numBucket == Long.MaxValue || jl.Double.isNaN(value) || min == max ||
+ jl.Double.isNaN(min) || jl.Double.isInfinite(min) ||
+ jl.Double.isNaN(max) || jl.Double.isInfinite(max)) {
+ return null
+ }
+
+ val lower = Math.min(min, max)
+ val upper = Math.max(min, max)
+
+ if (min < max) {
+ if (value < lower) {
+ 0L
+ } else if (value >= upper) {
+ numBucket + 1L
+ } else {
+ (numBucket.toDouble * (value - lower) / (upper - lower)).toLong + 1L
+ }
+ } else { // `min > max` case
+ if (value > upper) {
+ 0L
+ } else if (value <= lower) {
+ numBucket + 1L
+ } else {
+ (numBucket.toDouble * (upper - value) / (upper - lower)).toLong + 1L
+ }
+ }
+ }
+}
+
+/**
+ * Returns the bucket number into which the value of this expression would fall
+ * after being evaluated. Note that input arguments must follow conditions listed below;
+ * otherwise, the method will return null.
+ * - `numBucket` must be greater than zero and be less than Long.MaxValue
+ * - `value`, `min`, and `max` cannot be NaN
+ * - `min` bound cannot equal `max`
+ * - `min` and `max` must be finite
+ *
+ * Note: If `minValue` > `maxValue`, a return value is as follows;
+ * if `value` > `minValue`, it returns 0.
+ * if `value` <= `maxValue`, it returns `numBucket` + 1.
+ * otherwise, it returns (`numBucket` * (`minValue` - `value`) / (`minValue` - `maxValue`)) + 1
+ *
+ * @param value is the expression to compute a bucket number in the histogram
+ * @param minValue is the minimum value of the histogram
+ * @param maxValue is the maximum value of the histogram
+ * @param numBucket is the number of buckets
+ */
+@ExpressionDescription(
+ usage = """
+ _FUNC_(value, min_value, max_value, num_bucket) - Returns the bucket number to which
+ `value` would be assigned in an equiwidth histogram with `num_bucket` buckets,
+ in the range `min_value` to `max_value`."
+ """,
+ examples = """
+ Examples:
+ > SELECT _FUNC_(5.3, 0.2, 10.6, 5);
+ 3
+ > SELECT _FUNC_(-2.1, 1.3, 3.4, 3);
+ 0
+ > SELECT _FUNC_(8.1, 0.0, 5.7, 4);
+ 5
+ > SELECT _FUNC_(-0.9, 5.2, 0.5, 2);
+ 3
+ """,
+ since = "3.1.0")
+case class WidthBucket(
+ value: Expression,
+ minValue: Expression,
+ maxValue: Expression,
+ numBucket: Expression)
+ extends QuaternaryExpression with ImplicitCastInputTypes with NullIntolerant {
+
+ override def children: Seq[Expression] = Seq(value, minValue, maxValue, numBucket)
+ override def inputTypes: Seq[AbstractDataType] = Seq(DoubleType, DoubleType, DoubleType, LongType)
+ override def dataType: DataType = LongType
+ override def nullable: Boolean = true
+
+ override protected def nullSafeEval(input: Any, min: Any, max: Any, numBucket: Any): Any = {
+ WidthBucket.computeBucketNumber(
+ input.asInstanceOf[Double],
+ min.asInstanceOf[Double],
+ max.asInstanceOf[Double],
+ numBucket.asInstanceOf[Long])
+ }
+
+ override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+ defineCodeGen(ctx, ev, (input, min, max, numBucket) =>
+ "org.apache.spark.sql.catalyst.expressions.WidthBucket" +
+ s".computeBucketNumber($input, $min, $max, $numBucket)")
+ }
+}
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index d245aa5..1a9d5bb 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -1,6 +1,6 @@
<!-- Automatically generated by ExpressionsSchemaSuite -->
## Summary
- - Number of queries: 337
+ - Number of queries: 338
- Number of expressions that missing example: 34
- Expressions missing examples: and,string,tinyint,double,smallint,date,decimal,boolean,float,binary,bigint,int,timestamp,struct,cume_dist,dense_rank,input_file_block_length,input_file_block_start,input_file_name,lag,lead,monotonically_increasing_id,ntile,!,not,or,percent_rank,rank,row_number,spark_partition_id,version,window,positive,count_min_sketch
## Schema of Built-in Functions
@@ -291,6 +291,7 @@
| org.apache.spark.sql.catalyst.expressions.Uuid | uuid | SELECT uuid() | struct<uuid():string> |
| org.apache.spark.sql.catalyst.expressions.WeekDay | weekday | SELECT weekday('2009-07-30') | struct<weekday(CAST(2009-07-30 AS DATE)):int> |
| org.apache.spark.sql.catalyst.expressions.WeekOfYear | weekofyear | SELECT weekofyear('2008-02-20') | struct<weekofyear(CAST(2008-02-20 AS DATE)):int> |
+| org.apache.spark.sql.catalyst.expressions.WidthBucket | width_bucket | SELECT width_bucket(5.3, 0.2, 10.6, 5) | struct<widthbucket(CAST(5.3 AS DOUBLE), CAST(0.2 AS DOUBLE), CAST(10.6 AS DOUBLE), CAST(5 AS BIGINT)):bigint> |
| org.apache.spark.sql.catalyst.expressions.XxHash64 | xxhash64 | SELECT xxhash64('Spark', array(123), 2) | struct<xxhash64(Spark, array(123), 2):bigint> |
| org.apache.spark.sql.catalyst.expressions.Year | year | SELECT year('2016-07-30') | struct<year(CAST(2016-07-30 AS DATE)):int> |
| org.apache.spark.sql.catalyst.expressions.ZipWith | zip_with | SELECT zip_with(array(1, 2, 3), array('a', 'b', 'c'), (x, y) -> (y, x)) | struct<zip_with(array(1, 2, 3), array(a, b, c), lambdafunction(named_struct(y, namedlambdavariable(), x, namedlambdavariable()), namedlambdavariable(), namedlambdavariable())):array<struct<y:string,x:int>>> |
diff --git a/sql/core/src/test/resources/sql-tests/inputs/operators.sql b/sql/core/src/test/resources/sql-tests/inputs/operators.sql
index 20bf0eb..c296fa5 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/operators.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/operators.sql
@@ -81,3 +81,17 @@ select positive('-1.11'), positive(-1.11), negative('-1.11'), negative(-1.11);
-- pmod
select pmod(-7, 2), pmod(0, 2), pmod(7, 0), pmod(7, null), pmod(null, 2), pmod(null, null);
select pmod(cast(3.13 as decimal), cast(0 as decimal)), pmod(cast(2 as smallint), cast(0 as smallint));
+
+-- width_bucket
+select width_bucket(5.35, 0.024, 10.06, 5);
+select width_bucket(5.35, 0.024, 10.06, 3 + 2);
+select width_bucket('5.35', '0.024', '10.06', '5');
+select width_bucket(5.35, 0.024, 10.06, 2.5);
+select width_bucket(5.35, 0.024, 10.06, 0.5);
+select width_bucket(null, 0.024, 10.06, 5);
+select width_bucket(5.35, null, 10.06, 5);
+select width_bucket(5.35, 0.024, null, -5);
+select width_bucket(5.35, 0.024, 10.06, null);
+select width_bucket(5.35, 0.024, 10.06, -5);
+select width_bucket(5.35, 0.024, 10.06, 9223372036854775807L); -- long max value
+select width_bucket(5.35, 0.024, 10.06, 9223372036854775807L - 1);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/numeric.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/numeric.sql
index dbdb2ca..53f2aa4 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/numeric.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/numeric.sql
@@ -754,22 +754,22 @@ DROP TABLE ceil_floor_round;
-- round((2.5 * 10 ^ i)::numeric, -i)
-- FROM generate_series(-5,5) AS t(i);
--- [SPARK-21117] Built-in SQL Function Support - WIDTH_BUCKET
-- Testing for width_bucket(). For convenience, we test both the
-- numeric and float8 versions of the function in this file.
-- errors
--- SELECT width_bucket(5.0, 3.0, 4.0, 0);
--- SELECT width_bucket(5.0, 3.0, 4.0, -5);
--- SELECT width_bucket(3.5, 3.0, 3.0, 888);
--- SELECT width_bucket(5.0::float8, 3.0::float8, 4.0::float8, 0);
--- SELECT width_bucket(5.0::float8, 3.0::float8, 4.0::float8, -5);
--- SELECT width_bucket(3.5::float8, 3.0::float8, 3.0::float8, 888);
--- SELECT width_bucket('NaN', 3.0, 4.0, 888);
--- SELECT width_bucket(0::float8, 'NaN', 4.0::float8, 888);
+SELECT width_bucket(5.0, 3.0, 4.0, 0);
+SELECT width_bucket(5.0, 3.0, 4.0, -5);
+SELECT width_bucket(3.5, 3.0, 3.0, 888);
+SELECT width_bucket(double(5.0), double(3.0), double(4.0), 0);
+SELECT width_bucket(double(5.0), double(3.0), double(4.0), -5);
+SELECT width_bucket(double(3.5), double(3.0), double(3.0), 888);
+SELECT width_bucket('NaN', 3.0, 4.0, 888);
+SELECT width_bucket(double(0), 'NaN', double(4.0), 888);
-- normal operation
-- CREATE TABLE width_bucket_test (operand_num numeric, operand_f8 float8);
+CREATE TABLE width_bucket_test (operand_num decimal(30,15), operand_f8 double) USING parquet;
-- COPY width_bucket_test (operand_num) FROM stdin;
-- -5.2
@@ -795,28 +795,50 @@ DROP TABLE ceil_floor_round;
-- UPDATE width_bucket_test SET operand_f8 = operand_num::float8;
--- SELECT
--- operand_num,
--- width_bucket(operand_num, 0, 10, 5) AS wb_1,
--- width_bucket(operand_f8, 0, 10, 5) AS wb_1f,
--- width_bucket(operand_num, 10, 0, 5) AS wb_2,
--- width_bucket(operand_f8, 10, 0, 5) AS wb_2f,
--- width_bucket(operand_num, 2, 8, 4) AS wb_3,
--- width_bucket(operand_f8, 2, 8, 4) AS wb_3f,
--- width_bucket(operand_num, 5.0, 5.5, 20) AS wb_4,
--- width_bucket(operand_f8, 5.0, 5.5, 20) AS wb_4f,
--- width_bucket(operand_num, -25, 25, 10) AS wb_5,
--- width_bucket(operand_f8, -25, 25, 10) AS wb_5f
--- FROM width_bucket_test;
+INSERT INTO width_bucket_test VALUES
+ (-5.2, -5.2),
+ (-0.0000000001, -0.0000000001),
+ (0.000000000001, 0.000000000001),
+ (1, 1),
+ (1.99999999999999, 1.99999999999999),
+ (2, 2),
+ (2.00000000000001, 2.00000000000001),
+ (3, 3),
+ (4, 4),
+ (4.5, 4.5),
+ (5, 5),
+ (5.5, 5.5),
+ (6, 6),
+ (7, 7),
+ (8, 8),
+ (9, 9),
+ (9.99999999999999, 9.99999999999999),
+ (10, 10),
+ (10.0000000000001, 10.0000000000001);
+
+SELECT
+ operand_num,
+ width_bucket(operand_num, 0, 10, 5) AS wb_1,
+ width_bucket(operand_f8, 0, 10, 5) AS wb_1f,
+ width_bucket(operand_num, 10, 0, 5) AS wb_2,
+ width_bucket(operand_f8, 10, 0, 5) AS wb_2f,
+ width_bucket(operand_num, 2, 8, 4) AS wb_3,
+ width_bucket(operand_f8, 2, 8, 4) AS wb_3f,
+ width_bucket(operand_num, 5.0, 5.5, 20) AS wb_4,
+ width_bucket(operand_f8, 5.0, 5.5, 20) AS wb_4f,
+ width_bucket(operand_num, -25, 25, 10) AS wb_5,
+ width_bucket(operand_f8, -25, 25, 10) AS wb_5f
+ FROM width_bucket_test
+ ORDER BY operand_num ASC;
-- for float8 only, check positive and negative infinity: we require
-- finite bucket bounds, but allow an infinite operand
--- SELECT width_bucket(0.0::float8, 'Infinity'::float8, 5, 10); -- error
--- SELECT width_bucket(0.0::float8, 5, '-Infinity'::float8, 20); -- error
--- SELECT width_bucket('Infinity'::float8, 1, 10, 10),
--- width_bucket('-Infinity'::float8, 1, 10, 10);
+SELECT width_bucket(double(0.0), double('Infinity'), 5, 10); -- error
+SELECT width_bucket(double(0.0), 5, double('-Infinity'), 20); -- error
+SELECT width_bucket(double('Infinity'), 1, 10, 10),
+ width_bucket(double('-Infinity'), 1, 10, 10);
--- DROP TABLE width_bucket_test;
+DROP TABLE width_bucket_test;
-- [SPARK-28137] Missing Data Type Formatting Functions: TO_CHAR
-- TO_CHAR()
diff --git a/sql/core/src/test/resources/sql-tests/results/operators.sql.out b/sql/core/src/test/resources/sql-tests/results/operators.sql.out
index 9accc57..fc58bc7 100644
--- a/sql/core/src/test/resources/sql-tests/results/operators.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/operators.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 57
+-- Number of queries: 69
-- !query
@@ -456,3 +456,99 @@ select pmod(cast(3.13 as decimal), cast(0 as decimal)), pmod(cast(2 as smallint)
struct<pmod(CAST(3.13 AS DECIMAL(10,0)), CAST(0 AS DECIMAL(10,0))):decimal(10,0),pmod(CAST(2 AS SMALLINT), CAST(0 AS SMALLINT)):smallint>
-- !query output
NULL NULL
+
+
+-- !query
+select width_bucket(5.35, 0.024, 10.06, 5)
+-- !query schema
+struct<widthbucket(CAST(5.35 AS DOUBLE), CAST(0.024 AS DOUBLE), CAST(10.06 AS DOUBLE), CAST(5 AS BIGINT)):bigint>
+-- !query output
+3
+
+
+-- !query
+select width_bucket(5.35, 0.024, 10.06, 3 + 2)
+-- !query schema
+struct<widthbucket(CAST(5.35 AS DOUBLE), CAST(0.024 AS DOUBLE), CAST(10.06 AS DOUBLE), CAST((3 + 2) AS BIGINT)):bigint>
+-- !query output
+3
+
+
+-- !query
+select width_bucket('5.35', '0.024', '10.06', '5')
+-- !query schema
+struct<widthbucket(CAST(5.35 AS DOUBLE), CAST(0.024 AS DOUBLE), CAST(10.06 AS DOUBLE), CAST(5 AS BIGINT)):bigint>
+-- !query output
+3
+
+
+-- !query
+select width_bucket(5.35, 0.024, 10.06, 2.5)
+-- !query schema
+struct<widthbucket(CAST(5.35 AS DOUBLE), CAST(0.024 AS DOUBLE), CAST(10.06 AS DOUBLE), CAST(2.5 AS BIGINT)):bigint>
+-- !query output
+2
+
+
+-- !query
+select width_bucket(5.35, 0.024, 10.06, 0.5)
+-- !query schema
+struct<widthbucket(CAST(5.35 AS DOUBLE), CAST(0.024 AS DOUBLE), CAST(10.06 AS DOUBLE), CAST(0.5 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+select width_bucket(null, 0.024, 10.06, 5)
+-- !query schema
+struct<widthbucket(CAST(NULL AS DOUBLE), CAST(0.024 AS DOUBLE), CAST(10.06 AS DOUBLE), CAST(5 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+select width_bucket(5.35, null, 10.06, 5)
+-- !query schema
+struct<widthbucket(CAST(5.35 AS DOUBLE), CAST(NULL AS DOUBLE), CAST(10.06 AS DOUBLE), CAST(5 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+select width_bucket(5.35, 0.024, null, -5)
+-- !query schema
+struct<widthbucket(CAST(5.35 AS DOUBLE), CAST(0.024 AS DOUBLE), CAST(NULL AS DOUBLE), CAST(-5 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+select width_bucket(5.35, 0.024, 10.06, null)
+-- !query schema
+struct<widthbucket(CAST(5.35 AS DOUBLE), CAST(0.024 AS DOUBLE), CAST(10.06 AS DOUBLE), CAST(NULL AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+select width_bucket(5.35, 0.024, 10.06, -5)
+-- !query schema
+struct<widthbucket(CAST(5.35 AS DOUBLE), CAST(0.024 AS DOUBLE), CAST(10.06 AS DOUBLE), CAST(-5 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+select width_bucket(5.35, 0.024, 10.06, 9223372036854775807L)
+-- !query schema
+struct<widthbucket(CAST(5.35 AS DOUBLE), CAST(0.024 AS DOUBLE), CAST(10.06 AS DOUBLE), 9223372036854775807):bigint>
+-- !query output
+NULL
+
+
+-- !query
+select width_bucket(5.35, 0.024, 10.06, 9223372036854775807L - 1)
+-- !query schema
+struct<widthbucket(CAST(5.35 AS DOUBLE), CAST(0.024 AS DOUBLE), CAST(10.06 AS DOUBLE), (9223372036854775807 - CAST(1 AS BIGINT))):bigint>
+-- !query output
+4894746858139549697
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out
index 7b7aeb4..3cade4f 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 577
+-- Number of queries: 592
-- !query
@@ -4424,6 +4424,177 @@ struct<>
-- !query
+SELECT width_bucket(5.0, 3.0, 4.0, 0)
+-- !query schema
+struct<widthbucket(CAST(5.0 AS DOUBLE), CAST(3.0 AS DOUBLE), CAST(4.0 AS DOUBLE), CAST(0 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT width_bucket(5.0, 3.0, 4.0, -5)
+-- !query schema
+struct<widthbucket(CAST(5.0 AS DOUBLE), CAST(3.0 AS DOUBLE), CAST(4.0 AS DOUBLE), CAST(-5 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT width_bucket(3.5, 3.0, 3.0, 888)
+-- !query schema
+struct<widthbucket(CAST(3.5 AS DOUBLE), CAST(3.0 AS DOUBLE), CAST(3.0 AS DOUBLE), CAST(888 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT width_bucket(double(5.0), double(3.0), double(4.0), 0)
+-- !query schema
+struct<widthbucket(CAST(5.0 AS DOUBLE), CAST(3.0 AS DOUBLE), CAST(4.0 AS DOUBLE), CAST(0 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT width_bucket(double(5.0), double(3.0), double(4.0), -5)
+-- !query schema
+struct<widthbucket(CAST(5.0 AS DOUBLE), CAST(3.0 AS DOUBLE), CAST(4.0 AS DOUBLE), CAST(-5 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT width_bucket(double(3.5), double(3.0), double(3.0), 888)
+-- !query schema
+struct<widthbucket(CAST(3.5 AS DOUBLE), CAST(3.0 AS DOUBLE), CAST(3.0 AS DOUBLE), CAST(888 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT width_bucket('NaN', 3.0, 4.0, 888)
+-- !query schema
+struct<widthbucket(CAST(NaN AS DOUBLE), CAST(3.0 AS DOUBLE), CAST(4.0 AS DOUBLE), CAST(888 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT width_bucket(double(0), 'NaN', double(4.0), 888)
+-- !query schema
+struct<widthbucket(CAST(0 AS DOUBLE), CAST(NaN AS DOUBLE), CAST(4.0 AS DOUBLE), CAST(888 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+CREATE TABLE width_bucket_test (operand_num decimal(30,15), operand_f8 double) USING parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO width_bucket_test VALUES
+ (-5.2, -5.2),
+ (-0.0000000001, -0.0000000001),
+ (0.000000000001, 0.000000000001),
+ (1, 1),
+ (1.99999999999999, 1.99999999999999),
+ (2, 2),
+ (2.00000000000001, 2.00000000000001),
+ (3, 3),
+ (4, 4),
+ (4.5, 4.5),
+ (5, 5),
+ (5.5, 5.5),
+ (6, 6),
+ (7, 7),
+ (8, 8),
+ (9, 9),
+ (9.99999999999999, 9.99999999999999),
+ (10, 10),
+ (10.0000000000001, 10.0000000000001)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT
+ operand_num,
+ width_bucket(operand_num, 0, 10, 5) AS wb_1,
+ width_bucket(operand_f8, 0, 10, 5) AS wb_1f,
+ width_bucket(operand_num, 10, 0, 5) AS wb_2,
+ width_bucket(operand_f8, 10, 0, 5) AS wb_2f,
+ width_bucket(operand_num, 2, 8, 4) AS wb_3,
+ width_bucket(operand_f8, 2, 8, 4) AS wb_3f,
+ width_bucket(operand_num, 5.0, 5.5, 20) AS wb_4,
+ width_bucket(operand_f8, 5.0, 5.5, 20) AS wb_4f,
+ width_bucket(operand_num, -25, 25, 10) AS wb_5,
+ width_bucket(operand_f8, -25, 25, 10) AS wb_5f
+ FROM width_bucket_test
+ ORDER BY operand_num ASC
+-- !query schema
+struct<operand_num:decimal(30,15),wb_1:bigint,wb_1f:bigint,wb_2:bigint,wb_2f:bigint,wb_3:bigint,wb_3f:bigint,wb_4:bigint,wb_4f:bigint,wb_5:bigint,wb_5f:bigint>
+-- !query output
+-5.200000000000000 0 0 6 6 0 0 0 0 4 4
+-0.000000000100000 0 0 6 6 0 0 0 0 5 5
+0.000000000001000 1 1 5 5 0 0 0 0 6 6
+1.000000000000000 1 1 5 5 0 0 0 0 6 6
+1.999999999999990 1 1 5 5 0 0 0 0 6 6
+2.000000000000000 2 2 5 5 1 1 0 0 6 6
+2.000000000000010 2 2 4 4 1 1 0 0 6 6
+3.000000000000000 2 2 4 4 1 1 0 0 6 6
+4.000000000000000 3 3 4 4 2 2 0 0 6 6
+4.500000000000000 3 3 3 3 2 2 0 0 6 6
+5.000000000000000 3 3 3 3 3 3 1 1 7 7
+5.500000000000000 3 3 3 3 3 3 21 21 7 7
+6.000000000000000 4 4 3 3 3 3 21 21 7 7
+7.000000000000000 4 4 2 2 4 4 21 21 7 7
+8.000000000000000 5 5 2 2 5 5 21 21 7 7
+9.000000000000000 5 5 1 1 5 5 21 21 7 7
+9.999999999999990 5 5 1 1 5 5 21 21 7 7
+10.000000000000000 6 6 1 1 5 5 21 21 8 8
+10.000000000000100 6 6 0 0 5 5 21 21 8 8
+
+
+-- !query
+SELECT width_bucket(double(0.0), double('Infinity'), 5, 10)
+-- !query schema
+struct<widthbucket(CAST(0.0 AS DOUBLE), CAST(Infinity AS DOUBLE), CAST(5 AS DOUBLE), CAST(10 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT width_bucket(double(0.0), 5, double('-Infinity'), 20)
+-- !query schema
+struct<widthbucket(CAST(0.0 AS DOUBLE), CAST(5 AS DOUBLE), CAST(-Infinity AS DOUBLE), CAST(20 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT width_bucket(double('Infinity'), 1, 10, 10),
+ width_bucket(double('-Infinity'), 1, 10, 10)
+-- !query schema
+struct<widthbucket(CAST(Infinity AS DOUBLE), CAST(1 AS DOUBLE), CAST(10 AS DOUBLE), CAST(10 AS BIGINT)):bigint,widthbucket(CAST(-Infinity AS DOUBLE), CAST(1 AS DOUBLE), CAST(10 AS DOUBLE), CAST(10 AS BIGINT)):bigint>
+-- !query output
+11 0
+
+
+-- !query
+DROP TABLE width_bucket_test
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
CREATE TABLE num_input_test (n1 decimal(38, 18)) USING parquet
-- !query schema
struct<>
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org