You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/06/11 21:16:56 UTC
[spark] branch master updated: [SPARK-21117][SQL] Built-in SQL Function Support - WIDTH_BUCKET

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new b1adc3d  [SPARK-21117][SQL] Built-in SQL Function Support - WIDTH_BUCKET
b1adc3d is described below

commit b1adc3deee00058cba669534aee156dc7af243dc
Author: Takeshi Yamamuro <ya...@apache.org>
AuthorDate: Thu Jun 11 14:15:28 2020 -0700

    [SPARK-21117][SQL] Built-in SQL Function Support - WIDTH_BUCKET
    
    ### What changes were proposed in this pull request?
    
    This PR intends to add a build-in SQL function - `WIDTH_BUCKET`.
    It is the rework of #18323.
    
    Closes #18323
    
    The other RDBMS references for `WIDTH_BUCKET`:
     - Oracle: https://docs.oracle.com/cd/B28359_01/olap.111/b28126/dml_functions_2137.htm#OLADM717
     - PostgreSQL: https://www.postgresql.org/docs/current/functions-math.html
     - Snowflake: https://docs.snowflake.com/en/sql-reference/functions/width_bucket.html
     - Prestodb: https://prestodb.io/docs/current/functions/math.html
     - Teradata: https://docs.teradata.com/reader/kmuOwjp1zEYg98JsB8fu_A/Wa8vw69cGzoRyNULHZeudg
     - DB2: https://www.ibm.com/support/producthub/db2/docs/content/SSEPGG_11.5.0/com.ibm.db2.luw.sql.ref.doc/doc/r0061483.html?pos=2
    
    ### Why are the changes needed?
    
    For better usability.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Added unit tests.
    
    Closes #28764 from maropu/SPARK-21117.
    
    Lead-authored-by: Takeshi Yamamuro <ya...@apache.org>
    Co-authored-by: Yuming Wang <wg...@gmail.com>
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 .../sql/catalyst/analysis/FunctionRegistry.scala   |   1 +
 .../sql/catalyst/expressions/mathExpressions.scala |  96 ++++++++++++
 .../sql-functions/sql-expression-schema.md         |   3 +-
 .../test/resources/sql-tests/inputs/operators.sql  |  14 ++
 .../sql-tests/inputs/postgreSQL/numeric.sql        |  76 +++++----
 .../resources/sql-tests/results/operators.sql.out  |  98 +++++++++++-
 .../sql-tests/results/postgreSQL/numeric.sql.out   | 173 ++++++++++++++++++++-
 7 files changed, 431 insertions(+), 30 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index e2559d4..3989df5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -274,6 +274,7 @@ object FunctionRegistry {
     expression[Tan]("tan"),
     expression[Cot]("cot"),
     expression[Tanh]("tanh"),
+    expression[WidthBucket]("width_bucket"),
 
     expression[Add]("+"),
     expression[Subtract]("-"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
index fe8ea2a..5c76495 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
@@ -1325,3 +1325,99 @@ case class BRound(child: Expression, scale: Expression)
     with Serializable with ImplicitCastInputTypes {
   def this(child: Expression) = this(child, Literal(0))
 }
+
+object WidthBucket {
+
+  def computeBucketNumber(value: Double, min: Double, max: Double, numBucket: Long): jl.Long = {
+    if (numBucket <= 0 || numBucket == Long.MaxValue || jl.Double.isNaN(value) || min == max ||
+        jl.Double.isNaN(min) || jl.Double.isInfinite(min) ||
+        jl.Double.isNaN(max) || jl.Double.isInfinite(max)) {
+      return null
+    }
+
+    val lower = Math.min(min, max)
+    val upper = Math.max(min, max)
+
+    if (min < max) {
+      if (value < lower) {
+        0L
+      } else if (value >= upper) {
+        numBucket + 1L
+      } else {
+        (numBucket.toDouble * (value - lower) / (upper - lower)).toLong + 1L
+      }
+    } else { // `min > max` case
+      if (value > upper) {
+        0L
+      } else if (value <= lower) {
+        numBucket + 1L
+      } else {
+        (numBucket.toDouble * (upper - value) / (upper - lower)).toLong + 1L
+      }
+    }
+  }
+}
+
+/**
+ * Returns the bucket number into which the value of this expression would fall
+ * after being evaluated. Note that input arguments must follow conditions listed below;
+ * otherwise, the method will return null.
+ *  - `numBucket` must be greater than zero and be less than Long.MaxValue
+ *  - `value`, `min`, and `max` cannot be NaN
+ *  - `min` bound cannot equal `max`
+ *  - `min` and `max` must be finite
+ *
+ * Note: If `minValue` > `maxValue`, a return value is as follows;
+ *  if `value` > `minValue`, it returns 0.
+ *  if `value` <= `maxValue`, it returns `numBucket` + 1.
+ *  otherwise, it returns (`numBucket` * (`minValue` - `value`) / (`minValue` - `maxValue`)) + 1
+ *
+ * @param value is the expression to compute a bucket number in the histogram
+ * @param minValue is the minimum value of the histogram
+ * @param maxValue is the maximum value of the histogram
+ * @param numBucket is the number of buckets
+ */
+@ExpressionDescription(
+  usage = """
+    _FUNC_(value, min_value, max_value, num_bucket) - Returns the bucket number to which
+      `value` would be assigned in an equiwidth histogram with `num_bucket` buckets,
+      in the range `min_value` to `max_value`."
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(5.3, 0.2, 10.6, 5);
+       3
+      > SELECT _FUNC_(-2.1, 1.3, 3.4, 3);
+       0
+      > SELECT _FUNC_(8.1, 0.0, 5.7, 4);
+       5
+      > SELECT _FUNC_(-0.9, 5.2, 0.5, 2);
+       3
+  """,
+  since = "3.1.0")
+case class WidthBucket(
+    value: Expression,
+    minValue: Expression,
+    maxValue: Expression,
+    numBucket: Expression)
+  extends QuaternaryExpression with ImplicitCastInputTypes with NullIntolerant {
+
+  override def children: Seq[Expression] = Seq(value, minValue, maxValue, numBucket)
+  override def inputTypes: Seq[AbstractDataType] = Seq(DoubleType, DoubleType, DoubleType, LongType)
+  override def dataType: DataType = LongType
+  override def nullable: Boolean = true
+
+  override protected def nullSafeEval(input: Any, min: Any, max: Any, numBucket: Any): Any = {
+    WidthBucket.computeBucketNumber(
+      input.asInstanceOf[Double],
+      min.asInstanceOf[Double],
+      max.asInstanceOf[Double],
+      numBucket.asInstanceOf[Long])
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    defineCodeGen(ctx, ev, (input, min, max, numBucket) =>
+      "org.apache.spark.sql.catalyst.expressions.WidthBucket" +
+        s".computeBucketNumber($input, $min, $max, $numBucket)")
+  }
+}
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index d245aa5..1a9d5bb 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -1,6 +1,6 @@
 <!-- Automatically generated by ExpressionsSchemaSuite -->
 ## Summary
-  - Number of queries: 337
+  - Number of queries: 338
   - Number of expressions that missing example: 34
   - Expressions missing examples: and,string,tinyint,double,smallint,date,decimal,boolean,float,binary,bigint,int,timestamp,struct,cume_dist,dense_rank,input_file_block_length,input_file_block_start,input_file_name,lag,lead,monotonically_increasing_id,ntile,!,not,or,percent_rank,rank,row_number,spark_partition_id,version,window,positive,count_min_sketch
 ## Schema of Built-in Functions
@@ -291,6 +291,7 @@
 | org.apache.spark.sql.catalyst.expressions.Uuid | uuid | SELECT uuid() | struct<uuid():string> |
 | org.apache.spark.sql.catalyst.expressions.WeekDay | weekday | SELECT weekday('2009-07-30') | struct<weekday(CAST(2009-07-30 AS DATE)):int> |
 | org.apache.spark.sql.catalyst.expressions.WeekOfYear | weekofyear | SELECT weekofyear('2008-02-20') | struct<weekofyear(CAST(2008-02-20 AS DATE)):int> |
+| org.apache.spark.sql.catalyst.expressions.WidthBucket | width_bucket | SELECT width_bucket(5.3, 0.2, 10.6, 5) | struct<widthbucket(CAST(5.3 AS DOUBLE), CAST(0.2 AS DOUBLE), CAST(10.6 AS DOUBLE), CAST(5 AS BIGINT)):bigint> |
 | org.apache.spark.sql.catalyst.expressions.XxHash64 | xxhash64 | SELECT xxhash64('Spark', array(123), 2) | struct<xxhash64(Spark, array(123), 2):bigint> |
 | org.apache.spark.sql.catalyst.expressions.Year | year | SELECT year('2016-07-30') | struct<year(CAST(2016-07-30 AS DATE)):int> |
 | org.apache.spark.sql.catalyst.expressions.ZipWith | zip_with | SELECT zip_with(array(1, 2, 3), array('a', 'b', 'c'), (x, y) -> (y, x)) | struct<zip_with(array(1, 2, 3), array(a, b, c), lambdafunction(named_struct(y, namedlambdavariable(), x, namedlambdavariable()), namedlambdavariable(), namedlambdavariable())):array<struct<y:string,x:int>>> |
diff --git a/sql/core/src/test/resources/sql-tests/inputs/operators.sql b/sql/core/src/test/resources/sql-tests/inputs/operators.sql
index 20bf0eb..c296fa5 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/operators.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/operators.sql
@@ -81,3 +81,17 @@ select positive('-1.11'), positive(-1.11), negative('-1.11'), negative(-1.11);
 -- pmod
 select pmod(-7, 2), pmod(0, 2), pmod(7, 0), pmod(7, null), pmod(null, 2), pmod(null, null);
 select pmod(cast(3.13 as decimal), cast(0 as decimal)), pmod(cast(2 as smallint), cast(0 as smallint));
+
+-- width_bucket
+select width_bucket(5.35, 0.024, 10.06, 5);
+select width_bucket(5.35, 0.024, 10.06, 3 + 2);
+select width_bucket('5.35', '0.024', '10.06', '5');
+select width_bucket(5.35, 0.024, 10.06, 2.5);
+select width_bucket(5.35, 0.024, 10.06, 0.5);
+select width_bucket(null, 0.024, 10.06, 5);
+select width_bucket(5.35, null, 10.06, 5);
+select width_bucket(5.35, 0.024, null, -5);
+select width_bucket(5.35, 0.024, 10.06, null);
+select width_bucket(5.35, 0.024, 10.06, -5);
+select width_bucket(5.35, 0.024, 10.06, 9223372036854775807L); -- long max value
+select width_bucket(5.35, 0.024, 10.06, 9223372036854775807L - 1);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/numeric.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/numeric.sql
index dbdb2ca..53f2aa4 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/numeric.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/numeric.sql
@@ -754,22 +754,22 @@ DROP TABLE ceil_floor_round;
 -- 	round((2.5 * 10 ^ i)::numeric, -i)
 -- FROM generate_series(-5,5) AS t(i);
 
--- [SPARK-21117] Built-in SQL Function Support - WIDTH_BUCKET
 -- Testing for width_bucket(). For convenience, we test both the
 -- numeric and float8 versions of the function in this file.
 
 -- errors
--- SELECT width_bucket(5.0, 3.0, 4.0, 0);
--- SELECT width_bucket(5.0, 3.0, 4.0, -5);
--- SELECT width_bucket(3.5, 3.0, 3.0, 888);
--- SELECT width_bucket(5.0::float8, 3.0::float8, 4.0::float8, 0);
--- SELECT width_bucket(5.0::float8, 3.0::float8, 4.0::float8, -5);
--- SELECT width_bucket(3.5::float8, 3.0::float8, 3.0::float8, 888);
--- SELECT width_bucket('NaN', 3.0, 4.0, 888);
--- SELECT width_bucket(0::float8, 'NaN', 4.0::float8, 888);
+SELECT width_bucket(5.0, 3.0, 4.0, 0);
+SELECT width_bucket(5.0, 3.0, 4.0, -5);
+SELECT width_bucket(3.5, 3.0, 3.0, 888);
+SELECT width_bucket(double(5.0), double(3.0), double(4.0), 0);
+SELECT width_bucket(double(5.0), double(3.0), double(4.0), -5);
+SELECT width_bucket(double(3.5), double(3.0), double(3.0), 888);
+SELECT width_bucket('NaN', 3.0, 4.0, 888);
+SELECT width_bucket(double(0), 'NaN', double(4.0), 888);
 
 -- normal operation
 -- CREATE TABLE width_bucket_test (operand_num numeric, operand_f8 float8);
+CREATE TABLE width_bucket_test (operand_num decimal(30,15), operand_f8 double) USING parquet;
 
 -- COPY width_bucket_test (operand_num) FROM stdin;
 -- -5.2
@@ -795,28 +795,50 @@ DROP TABLE ceil_floor_round;
 
 -- UPDATE width_bucket_test SET operand_f8 = operand_num::float8;
 
--- SELECT
---     operand_num,
---     width_bucket(operand_num, 0, 10, 5) AS wb_1,
---     width_bucket(operand_f8, 0, 10, 5) AS wb_1f,
---     width_bucket(operand_num, 10, 0, 5) AS wb_2,
---     width_bucket(operand_f8, 10, 0, 5) AS wb_2f,
---     width_bucket(operand_num, 2, 8, 4) AS wb_3,
---     width_bucket(operand_f8, 2, 8, 4) AS wb_3f,
---     width_bucket(operand_num, 5.0, 5.5, 20) AS wb_4,
---     width_bucket(operand_f8, 5.0, 5.5, 20) AS wb_4f,
---     width_bucket(operand_num, -25, 25, 10) AS wb_5,
---     width_bucket(operand_f8, -25, 25, 10) AS wb_5f
---     FROM width_bucket_test;
+INSERT INTO width_bucket_test VALUES
+    (-5.2, -5.2),
+    (-0.0000000001, -0.0000000001),
+    (0.000000000001, 0.000000000001),
+    (1, 1),
+    (1.99999999999999, 1.99999999999999),
+    (2, 2),
+    (2.00000000000001, 2.00000000000001),
+    (3, 3),
+    (4, 4),
+    (4.5, 4.5),
+    (5, 5),
+    (5.5, 5.5),
+    (6, 6),
+    (7, 7),
+    (8, 8),
+    (9, 9),
+    (9.99999999999999, 9.99999999999999),
+    (10, 10),
+    (10.0000000000001, 10.0000000000001);
+
+SELECT
+    operand_num,
+    width_bucket(operand_num, 0, 10, 5) AS wb_1,
+    width_bucket(operand_f8, 0, 10, 5) AS wb_1f,
+    width_bucket(operand_num, 10, 0, 5) AS wb_2,
+    width_bucket(operand_f8, 10, 0, 5) AS wb_2f,
+    width_bucket(operand_num, 2, 8, 4) AS wb_3,
+    width_bucket(operand_f8, 2, 8, 4) AS wb_3f,
+    width_bucket(operand_num, 5.0, 5.5, 20) AS wb_4,
+    width_bucket(operand_f8, 5.0, 5.5, 20) AS wb_4f,
+    width_bucket(operand_num, -25, 25, 10) AS wb_5,
+    width_bucket(operand_f8, -25, 25, 10) AS wb_5f
+    FROM width_bucket_test
+    ORDER BY operand_num ASC;
 
 -- for float8 only, check positive and negative infinity: we require
 -- finite bucket bounds, but allow an infinite operand
--- SELECT width_bucket(0.0::float8, 'Infinity'::float8, 5, 10); -- error
--- SELECT width_bucket(0.0::float8, 5, '-Infinity'::float8, 20); -- error
--- SELECT width_bucket('Infinity'::float8, 1, 10, 10),
---        width_bucket('-Infinity'::float8, 1, 10, 10);
+SELECT width_bucket(double(0.0), double('Infinity'), 5, 10); -- error
+SELECT width_bucket(double(0.0), 5, double('-Infinity'), 20); -- error
+SELECT width_bucket(double('Infinity'), 1, 10, 10),
+       width_bucket(double('-Infinity'), 1, 10, 10);
 
--- DROP TABLE width_bucket_test;
+DROP TABLE width_bucket_test;
 
 -- [SPARK-28137] Missing Data Type Formatting Functions: TO_CHAR
 -- TO_CHAR()
diff --git a/sql/core/src/test/resources/sql-tests/results/operators.sql.out b/sql/core/src/test/resources/sql-tests/results/operators.sql.out
index 9accc57..fc58bc7 100644
--- a/sql/core/src/test/resources/sql-tests/results/operators.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/operators.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 57
+-- Number of queries: 69
 
 
 -- !query
@@ -456,3 +456,99 @@ select pmod(cast(3.13 as decimal), cast(0 as decimal)), pmod(cast(2 as smallint)
 struct<pmod(CAST(3.13 AS DECIMAL(10,0)), CAST(0 AS DECIMAL(10,0))):decimal(10,0),pmod(CAST(2 AS SMALLINT), CAST(0 AS SMALLINT)):smallint>
 -- !query output
 NULL	NULL
+
+
+-- !query
+select width_bucket(5.35, 0.024, 10.06, 5)
+-- !query schema
+struct<widthbucket(CAST(5.35 AS DOUBLE), CAST(0.024 AS DOUBLE), CAST(10.06 AS DOUBLE), CAST(5 AS BIGINT)):bigint>
+-- !query output
+3
+
+
+-- !query
+select width_bucket(5.35, 0.024, 10.06, 3 + 2)
+-- !query schema
+struct<widthbucket(CAST(5.35 AS DOUBLE), CAST(0.024 AS DOUBLE), CAST(10.06 AS DOUBLE), CAST((3 + 2) AS BIGINT)):bigint>
+-- !query output
+3
+
+
+-- !query
+select width_bucket('5.35', '0.024', '10.06', '5')
+-- !query schema
+struct<widthbucket(CAST(5.35 AS DOUBLE), CAST(0.024 AS DOUBLE), CAST(10.06 AS DOUBLE), CAST(5 AS BIGINT)):bigint>
+-- !query output
+3
+
+
+-- !query
+select width_bucket(5.35, 0.024, 10.06, 2.5)
+-- !query schema
+struct<widthbucket(CAST(5.35 AS DOUBLE), CAST(0.024 AS DOUBLE), CAST(10.06 AS DOUBLE), CAST(2.5 AS BIGINT)):bigint>
+-- !query output
+2
+
+
+-- !query
+select width_bucket(5.35, 0.024, 10.06, 0.5)
+-- !query schema
+struct<widthbucket(CAST(5.35 AS DOUBLE), CAST(0.024 AS DOUBLE), CAST(10.06 AS DOUBLE), CAST(0.5 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+select width_bucket(null, 0.024, 10.06, 5)
+-- !query schema
+struct<widthbucket(CAST(NULL AS DOUBLE), CAST(0.024 AS DOUBLE), CAST(10.06 AS DOUBLE), CAST(5 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+select width_bucket(5.35, null, 10.06, 5)
+-- !query schema
+struct<widthbucket(CAST(5.35 AS DOUBLE), CAST(NULL AS DOUBLE), CAST(10.06 AS DOUBLE), CAST(5 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+select width_bucket(5.35, 0.024, null, -5)
+-- !query schema
+struct<widthbucket(CAST(5.35 AS DOUBLE), CAST(0.024 AS DOUBLE), CAST(NULL AS DOUBLE), CAST(-5 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+select width_bucket(5.35, 0.024, 10.06, null)
+-- !query schema
+struct<widthbucket(CAST(5.35 AS DOUBLE), CAST(0.024 AS DOUBLE), CAST(10.06 AS DOUBLE), CAST(NULL AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+select width_bucket(5.35, 0.024, 10.06, -5)
+-- !query schema
+struct<widthbucket(CAST(5.35 AS DOUBLE), CAST(0.024 AS DOUBLE), CAST(10.06 AS DOUBLE), CAST(-5 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+select width_bucket(5.35, 0.024, 10.06, 9223372036854775807L)
+-- !query schema
+struct<widthbucket(CAST(5.35 AS DOUBLE), CAST(0.024 AS DOUBLE), CAST(10.06 AS DOUBLE), 9223372036854775807):bigint>
+-- !query output
+NULL
+
+
+-- !query
+select width_bucket(5.35, 0.024, 10.06, 9223372036854775807L - 1)
+-- !query schema
+struct<widthbucket(CAST(5.35 AS DOUBLE), CAST(0.024 AS DOUBLE), CAST(10.06 AS DOUBLE), (9223372036854775807 - CAST(1 AS BIGINT))):bigint>
+-- !query output
+4894746858139549697
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out
index 7b7aeb4..3cade4f 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 577
+-- Number of queries: 592
 
 
 -- !query
@@ -4424,6 +4424,177 @@ struct<>
 
 
 -- !query
+SELECT width_bucket(5.0, 3.0, 4.0, 0)
+-- !query schema
+struct<widthbucket(CAST(5.0 AS DOUBLE), CAST(3.0 AS DOUBLE), CAST(4.0 AS DOUBLE), CAST(0 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT width_bucket(5.0, 3.0, 4.0, -5)
+-- !query schema
+struct<widthbucket(CAST(5.0 AS DOUBLE), CAST(3.0 AS DOUBLE), CAST(4.0 AS DOUBLE), CAST(-5 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT width_bucket(3.5, 3.0, 3.0, 888)
+-- !query schema
+struct<widthbucket(CAST(3.5 AS DOUBLE), CAST(3.0 AS DOUBLE), CAST(3.0 AS DOUBLE), CAST(888 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT width_bucket(double(5.0), double(3.0), double(4.0), 0)
+-- !query schema
+struct<widthbucket(CAST(5.0 AS DOUBLE), CAST(3.0 AS DOUBLE), CAST(4.0 AS DOUBLE), CAST(0 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT width_bucket(double(5.0), double(3.0), double(4.0), -5)
+-- !query schema
+struct<widthbucket(CAST(5.0 AS DOUBLE), CAST(3.0 AS DOUBLE), CAST(4.0 AS DOUBLE), CAST(-5 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT width_bucket(double(3.5), double(3.0), double(3.0), 888)
+-- !query schema
+struct<widthbucket(CAST(3.5 AS DOUBLE), CAST(3.0 AS DOUBLE), CAST(3.0 AS DOUBLE), CAST(888 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT width_bucket('NaN', 3.0, 4.0, 888)
+-- !query schema
+struct<widthbucket(CAST(NaN AS DOUBLE), CAST(3.0 AS DOUBLE), CAST(4.0 AS DOUBLE), CAST(888 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT width_bucket(double(0), 'NaN', double(4.0), 888)
+-- !query schema
+struct<widthbucket(CAST(0 AS DOUBLE), CAST(NaN AS DOUBLE), CAST(4.0 AS DOUBLE), CAST(888 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+CREATE TABLE width_bucket_test (operand_num decimal(30,15), operand_f8 double) USING parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO width_bucket_test VALUES
+    (-5.2, -5.2),
+    (-0.0000000001, -0.0000000001),
+    (0.000000000001, 0.000000000001),
+    (1, 1),
+    (1.99999999999999, 1.99999999999999),
+    (2, 2),
+    (2.00000000000001, 2.00000000000001),
+    (3, 3),
+    (4, 4),
+    (4.5, 4.5),
+    (5, 5),
+    (5.5, 5.5),
+    (6, 6),
+    (7, 7),
+    (8, 8),
+    (9, 9),
+    (9.99999999999999, 9.99999999999999),
+    (10, 10),
+    (10.0000000000001, 10.0000000000001)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT
+    operand_num,
+    width_bucket(operand_num, 0, 10, 5) AS wb_1,
+    width_bucket(operand_f8, 0, 10, 5) AS wb_1f,
+    width_bucket(operand_num, 10, 0, 5) AS wb_2,
+    width_bucket(operand_f8, 10, 0, 5) AS wb_2f,
+    width_bucket(operand_num, 2, 8, 4) AS wb_3,
+    width_bucket(operand_f8, 2, 8, 4) AS wb_3f,
+    width_bucket(operand_num, 5.0, 5.5, 20) AS wb_4,
+    width_bucket(operand_f8, 5.0, 5.5, 20) AS wb_4f,
+    width_bucket(operand_num, -25, 25, 10) AS wb_5,
+    width_bucket(operand_f8, -25, 25, 10) AS wb_5f
+    FROM width_bucket_test
+    ORDER BY operand_num ASC
+-- !query schema
+struct<operand_num:decimal(30,15),wb_1:bigint,wb_1f:bigint,wb_2:bigint,wb_2f:bigint,wb_3:bigint,wb_3f:bigint,wb_4:bigint,wb_4f:bigint,wb_5:bigint,wb_5f:bigint>
+-- !query output
+-5.200000000000000	0	0	6	6	0	0	0	0	4	4
+-0.000000000100000	0	0	6	6	0	0	0	0	5	5
+0.000000000001000	1	1	5	5	0	0	0	0	6	6
+1.000000000000000	1	1	5	5	0	0	0	0	6	6
+1.999999999999990	1	1	5	5	0	0	0	0	6	6
+2.000000000000000	2	2	5	5	1	1	0	0	6	6
+2.000000000000010	2	2	4	4	1	1	0	0	6	6
+3.000000000000000	2	2	4	4	1	1	0	0	6	6
+4.000000000000000	3	3	4	4	2	2	0	0	6	6
+4.500000000000000	3	3	3	3	2	2	0	0	6	6
+5.000000000000000	3	3	3	3	3	3	1	1	7	7
+5.500000000000000	3	3	3	3	3	3	21	21	7	7
+6.000000000000000	4	4	3	3	3	3	21	21	7	7
+7.000000000000000	4	4	2	2	4	4	21	21	7	7
+8.000000000000000	5	5	2	2	5	5	21	21	7	7
+9.000000000000000	5	5	1	1	5	5	21	21	7	7
+9.999999999999990	5	5	1	1	5	5	21	21	7	7
+10.000000000000000	6	6	1	1	5	5	21	21	8	8
+10.000000000000100	6	6	0	0	5	5	21	21	8	8
+
+
+-- !query
+SELECT width_bucket(double(0.0), double('Infinity'), 5, 10)
+-- !query schema
+struct<widthbucket(CAST(0.0 AS DOUBLE), CAST(Infinity AS DOUBLE), CAST(5 AS DOUBLE), CAST(10 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT width_bucket(double(0.0), 5, double('-Infinity'), 20)
+-- !query schema
+struct<widthbucket(CAST(0.0 AS DOUBLE), CAST(5 AS DOUBLE), CAST(-Infinity AS DOUBLE), CAST(20 AS BIGINT)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT width_bucket(double('Infinity'), 1, 10, 10),
+       width_bucket(double('-Infinity'), 1, 10, 10)
+-- !query schema
+struct<widthbucket(CAST(Infinity AS DOUBLE), CAST(1 AS DOUBLE), CAST(10 AS DOUBLE), CAST(10 AS BIGINT)):bigint,widthbucket(CAST(-Infinity AS DOUBLE), CAST(1 AS DOUBLE), CAST(10 AS DOUBLE), CAST(10 AS BIGINT)):bigint>
+-- !query output
+11	0
+
+
+-- !query
+DROP TABLE width_bucket_test
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
 CREATE TABLE num_input_test (n1 decimal(38, 18)) USING parquet
 -- !query schema
 struct<>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org