You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ya...@apache.org on 2020/09/11 00:20:23 UTC

[spark] branch branch-3.0 updated: [SPARK-32840][SQL][3.0] Invalid interval value can happen to be just adhesive with the unit

This is an automated email from the ASF dual-hosted git repository.

yamamuro pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 4fdd818  [SPARK-32840][SQL][3.0] Invalid interval value can happen to be just adhesive with the unit
4fdd818 is described below

commit 4fdd818fd9cf017b4f149c28fe944c628addf4f5
Author: Kent Yao <ya...@hotmail.com>
AuthorDate: Fri Sep 11 09:16:12 2020 +0900

    [SPARK-32840][SQL][3.0] Invalid interval value can happen to be just adhesive with the unit
    
    THIS PR backports https://github.com/apache/spark/pull/29708 to 3.0
    
    ### What changes were proposed in this pull request?
    In this PR, we add a checker for STRING form interval value ahead for parsing multiple units intervals and fail directly if the interval value contains alphabets to prevent correctness issues like `interval '1 day 2' day`=`3 days`.
    
    ### Why are the changes needed?
    
    fix correctness issue
    
    ### Does this PR introduce _any_ user-facing change?
    
    yes, in spark 3.0.0 `interval '1 day 2' day`=`3 days` but now we fail with ParseException
    ### How was this patch tested?
    
    add a test.
    
    Closes #29716 from yaooqinn/SPARK-32840-30.
    
    Authored-by: Kent Yao <ya...@hotmail.com>
    Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
---
 .../spark/sql/catalyst/parser/AstBuilder.scala     | 11 ++++++-
 .../test/resources/sql-tests/inputs/interval.sql   |  4 +++
 .../sql-tests/results/ansi/interval.sql.out        | 38 +++++++++++++++++++++-
 .../resources/sql-tests/results/interval.sql.out   | 38 +++++++++++++++++++++-
 4 files changed, 88 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index b29fe21..307a72e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -2107,7 +2107,16 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
         val kvs = units.indices.map { i =>
           val u = units(i).getText
           val v = if (values(i).STRING() != null) {
-            string(values(i).STRING())
+            val value = string(values(i).STRING())
+            // SPARK-32840: For invalid cases, e.g. INTERVAL '1 day 2' hour,
+            // INTERVAL 'interval 1' day, we need to check ahead before they are concatenated with
+            // units and become valid ones, e.g. '1 day 2 hour'.
+            // Ideally, we only ensure the value parts don't contain any units here.
+            if (value.exists(Character.isLetter)) {
+              throw new ParseException("Can only use numbers in the interval value part for" +
+                s" multiple unit value pairs interval form, but got invalid value: $value", ctx)
+            }
+            value
           } else {
             values(i).getText
           }
diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
index 9ddefa5..9ad968e 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
@@ -188,3 +188,7 @@ select interval '1.2';
 select interval '- 2';
 select interval '1 day -';
 select interval '1 day 1';
+
+select interval '1 day 2' day;
+select interval 'interval 1' day;
+select interval '-\t 1' day;
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
index eb84f3e..5a66db9 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 100
+-- Number of queries: 103
 
 
 -- !query
@@ -1054,3 +1054,39 @@ Cannot parse the INTERVAL value: 1 day 1(line 1, pos 7)
 == SQL ==
 select interval '1 day 1'
 -------^^^
+
+
+-- !query
+select interval '1 day 2' day
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Can only use numbers in the interval value part for multiple unit value pairs interval form, but got invalid value: 1 day 2(line 1, pos 16)
+
+== SQL ==
+select interval '1 day 2' day
+----------------^^^
+
+
+-- !query
+select interval 'interval 1' day
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Can only use numbers in the interval value part for multiple unit value pairs interval form, but got invalid value: interval 1(line 1, pos 16)
+
+== SQL ==
+select interval 'interval 1' day
+----------------^^^
+
+
+-- !query
+select interval '-\t 1' day
+-- !query schema
+struct<INTERVAL '-1 days':interval>
+-- !query output
+-1 days
diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
index c5dd1e0..baf7f16 100644
--- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 100
+-- Number of queries: 103
 
 
 -- !query
@@ -1026,3 +1026,39 @@ Cannot parse the INTERVAL value: 1 day 1(line 1, pos 7)
 == SQL ==
 select interval '1 day 1'
 -------^^^
+
+
+-- !query
+select interval '1 day 2' day
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Can only use numbers in the interval value part for multiple unit value pairs interval form, but got invalid value: 1 day 2(line 1, pos 16)
+
+== SQL ==
+select interval '1 day 2' day
+----------------^^^
+
+
+-- !query
+select interval 'interval 1' day
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Can only use numbers in the interval value part for multiple unit value pairs interval form, but got invalid value: interval 1(line 1, pos 16)
+
+== SQL ==
+select interval 'interval 1' day
+----------------^^^
+
+
+-- !query
+select interval '-\t 1' day
+-- !query schema
+struct<INTERVAL '-1 days':interval>
+-- !query output
+-1 days


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org