You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/11/17 15:16:06 UTC

[arrow-datafusion] branch master updated: Update sqlparser requirement from 0.26 to 0.27 (#4226)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new a0581dc61 Update sqlparser requirement from 0.26 to 0.27 (#4226)
a0581dc61 is described below

commit a0581dc61beef8400157177134956e0875934a14
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Thu Nov 17 10:16:01 2022 -0500

    Update sqlparser requirement from 0.26 to 0.27 (#4226)
    
    * Update sqlparser requirement from 0.26 to 0.27
    
    Updates the requirements on [sqlparser](https://github.com/sqlparser-rs/sqlparser-rs) to permit the latest version.
    - [Release notes](https://github.com/sqlparser-rs/sqlparser-rs/releases)
    - [Changelog](https://github.com/sqlparser-rs/sqlparser-rs/blob/main/CHANGELOG.md)
    - [Commits](https://github.com/sqlparser-rs/sqlparser-rs/compare/v0.26.0...v0.27.0)
    
    ---
    updated-dependencies:
    - dependency-name: sqlparser
      dependency-type: direct:production
    ...
    
    Signed-off-by: dependabot[bot] <su...@github.com>
    
    * Update to sqlparser 0.27
    
    * Update datafusion-cli lock
    
    * fix up some tests
    
    * re-implement handling for array_agg
    
    * add test
    
    Signed-off-by: dependabot[bot] <su...@github.com>
    Co-authored-by: dependabot[bot] <49...@users.noreply.github.com>
---
 datafusion-cli/Cargo.lock               |  4 +-
 datafusion-cli/src/object_storage.rs    |  2 +-
 datafusion/common/Cargo.toml            |  2 +-
 datafusion/core/Cargo.toml              |  2 +-
 datafusion/core/tests/sql/aggregates.rs | 32 +++++++++++++
 datafusion/core/tests/sql/timestamp.rs  |  4 +-
 datafusion/expr/Cargo.toml              |  2 +-
 datafusion/sql/Cargo.toml               |  2 +-
 datafusion/sql/src/planner.rs           | 82 +++++++++++++++++++++++++++++----
 9 files changed, 114 insertions(+), 18 deletions(-)

diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index b42c20390..efbbf762b 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -2207,9 +2207,9 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
 
 [[package]]
 name = "sqlparser"
-version = "0.26.0"
+version = "0.27.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "86be66ea0b2b22749cfa157d16e2e84bf793e626a3375f4d378dc289fa03affb"
+checksum = "aba319938d4bfe250a769ac88278b629701024fe16f34257f9563bc628081970"
 dependencies = [
  "log",
 ]
diff --git a/datafusion-cli/src/object_storage.rs b/datafusion-cli/src/object_storage.rs
index 0982f3ff9..64c48840e 100644
--- a/datafusion-cli/src/object_storage.rs
+++ b/datafusion-cli/src/object_storage.rs
@@ -145,7 +145,7 @@ mod tests {
         let res = provider.get_by_url(&url);
         let msg = match res {
             Err(e) => format!("{}", e),
-            Ok(_) => "".to_string()
+            Ok(_) => "".to_string(),
         };
         assert_eq!("".to_string(), msg); // Fail with error message
         env::remove_var("AWS_REGION");
diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml
index 69d01b301..7284f3f37 100644
--- a/datafusion/common/Cargo.toml
+++ b/datafusion/common/Cargo.toml
@@ -46,4 +46,4 @@ cranelift-module = { version = "0.89.0", optional = true }
 object_store = { version = "0.5.0", default-features = false, optional = true }
 parquet = { version = "27.0.0", default-features = false, optional = true }
 pyo3 = { version = "0.17.1", optional = true }
-sqlparser = "0.26"
+sqlparser = "0.27"
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index efd28ee9c..ae073c7c0 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -89,7 +89,7 @@ pyo3 = { version = "0.17.1", optional = true }
 rand = "0.8"
 rayon = { version = "1.5", optional = true }
 smallvec = { version = "1.6", features = ["union"] }
-sqlparser = "0.26"
+sqlparser = "0.27"
 tempfile = "3"
 tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] }
 tokio-stream = "0.1"
diff --git a/datafusion/core/tests/sql/aggregates.rs b/datafusion/core/tests/sql/aggregates.rs
index b4c25fd9e..91f75c0eb 100644
--- a/datafusion/core/tests/sql/aggregates.rs
+++ b/datafusion/core/tests/sql/aggregates.rs
@@ -1318,6 +1318,38 @@ async fn csv_query_array_agg_with_overflow() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn csv_query_array_agg_unsupported() -> Result<()> {
+    let ctx = SessionContext::new();
+    register_aggregate_csv(&ctx).await?;
+
+    let results = plan_and_collect(
+        &ctx,
+        "SELECT array_agg(c13 ORDER BY c1) FROM aggregate_test_100",
+    )
+    .await
+    .unwrap_err();
+
+    assert_eq!(
+        results.to_string(),
+        "This feature is not implemented: ORDER BY not supported in ARRAY_AGG: c1"
+    );
+
+    let results = plan_and_collect(
+        &ctx,
+        "SELECT array_agg(c13 LIMIT 1) FROM aggregate_test_100",
+    )
+    .await
+    .unwrap_err();
+
+    assert_eq!(
+        results.to_string(),
+        "This feature is not implemented: LIMIT not supported in ARRAY_AGG: 1"
+    );
+
+    Ok(())
+}
+
 #[tokio::test]
 async fn csv_query_array_cube_agg_with_overflow() -> Result<()> {
     let ctx = SessionContext::new();
diff --git a/datafusion/core/tests/sql/timestamp.rs b/datafusion/core/tests/sql/timestamp.rs
index dec04f653..cb70ab2d0 100644
--- a/datafusion/core/tests/sql/timestamp.rs
+++ b/datafusion/core/tests/sql/timestamp.rs
@@ -1579,7 +1579,7 @@ async fn test_cast_to_time_with_time_zone_should_not_work() -> Result<()> {
 
     assert_eq!(
         results.to_string(),
-        "This feature is not implemented: Unsupported SQL type Time(WithTimeZone)"
+        "This feature is not implemented: Unsupported SQL type Time(None, WithTimeZone)"
     );
 
     Ok(())
@@ -1612,7 +1612,7 @@ async fn test_cast_to_timetz_should_not_work() -> Result<()> {
 
     assert_eq!(
         results.to_string(),
-        "This feature is not implemented: Unsupported SQL type Time(Tz)"
+        "This feature is not implemented: Unsupported SQL type Time(None, Tz)"
     );
     Ok(())
 }
diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml
index 6e6fd13d5..4527b2bd3 100644
--- a/datafusion/expr/Cargo.toml
+++ b/datafusion/expr/Cargo.toml
@@ -39,4 +39,4 @@ ahash = { version = "0.8", default-features = false, features = ["runtime-rng"]
 arrow = { version = "27.0.0", default-features = false }
 datafusion-common = { path = "../common", version = "14.0.0" }
 log = "^0.4"
-sqlparser = "0.26"
+sqlparser = "0.27"
diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml
index 2c712ba86..ef4217021 100644
--- a/datafusion/sql/Cargo.toml
+++ b/datafusion/sql/Cargo.toml
@@ -40,4 +40,4 @@ unicode_expressions = []
 arrow = { version = "27.0.0", default-features = false }
 datafusion-common = { path = "../common", version = "14.0.0" }
 datafusion-expr = { path = "../expr", version = "14.0.0" }
-sqlparser = "0.26"
+sqlparser = "0.27"
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index d21252744..f3cd1674d 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -23,8 +23,8 @@ use std::sync::Arc;
 use std::{convert::TryInto, vec};
 
 use arrow::datatypes::*;
-use sqlparser::ast::ExactNumberInfo;
 use sqlparser::ast::TimezoneInfo;
+use sqlparser::ast::{ArrayAgg, ExactNumberInfo, SetQuantifier};
 use sqlparser::ast::{
     BinaryOperator, DataType as SQLDataType, DateTimeField, Expr as SQLExpr, FunctionArg,
     FunctionArgExpr, Ident, Join, JoinConstraint, JoinOperator, ObjectName,
@@ -460,8 +460,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 op,
                 left,
                 right,
-                all,
+                set_quantifier,
             } => {
+                let all = match set_quantifier {
+                    SetQuantifier::All => true,
+                    SetQuantifier::Distinct | SetQuantifier::None => false,
+                };
+
                 let left_plan =
                     self.set_expr_to_plan(*left, None, ctes, outer_query_schema)?;
                 let right_plan =
@@ -2320,6 +2325,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
 
             SQLExpr::Subquery(subquery) => self.parse_scalar_subquery(&subquery, schema, ctes),
 
+            SQLExpr::ArrayAgg(array_agg) => self.parse_array_agg(array_agg, schema, ctes),
+
             _ => Err(DataFusionError::NotImplemented(format!(
                 "Unsupported ast node in sqltorel: {:?}",
                 sql
@@ -2382,6 +2389,53 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         }))
     }
 
+    fn parse_array_agg(
+        &self,
+        array_agg: ArrayAgg,
+        input_schema: &DFSchema,
+        ctes: &mut HashMap<String, LogicalPlan>,
+    ) -> Result<Expr> {
+        // Some dialects have special syntax for array_agg. DataFusion only supports it like a function.
+        let ArrayAgg {
+            distinct,
+            expr,
+            order_by,
+            limit,
+            within_group,
+        } = array_agg;
+
+        if let Some(order_by) = order_by {
+            return Err(DataFusionError::NotImplemented(format!(
+                "ORDER BY not supported in ARRAY_AGG: {}",
+                order_by
+            )));
+        }
+
+        if let Some(limit) = limit {
+            return Err(DataFusionError::NotImplemented(format!(
+                "LIMIT not supported in ARRAY_AGG: {}",
+                limit
+            )));
+        }
+
+        if within_group {
+            return Err(DataFusionError::NotImplemented(
+                "WITHIN GROUP not supported in ARRAY_AGG".to_string(),
+            ));
+        }
+
+        let args = vec![self.sql_expr_to_logical_expr(*expr, input_schema, ctes)?];
+        // next, aggregate built-ins
+        let fun = AggregateFunction::ArrayAgg;
+
+        Ok(Expr::AggregateFunction {
+            fun,
+            distinct,
+            args,
+            filter: None,
+        })
+    }
+
     fn function_args_to_expr(
         &self,
         args: Vec<FunctionArg>,
@@ -2532,6 +2586,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 Value::SingleQuotedString(s) => s.to_string(),
                 Value::Number(_, _) | Value::Boolean(_) => v.to_string(),
                 Value::DoubleQuotedString(_)
+                | Value::UnQuotedString(_)
                 | Value::EscapedStringLiteral(_)
                 | Value::NationalStringLiteral(_)
                 | Value::HexStringLiteral(_)
@@ -2756,13 +2811,16 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
 
     fn convert_data_type(&self, sql_type: &SQLDataType) -> Result<DataType> {
         match sql_type {
-            SQLDataType::Array(inner_sql_type) => {
+            SQLDataType::Array(Some(inner_sql_type)) => {
                 let data_type = self.convert_simple_data_type(inner_sql_type)?;
 
                 Ok(DataType::List(Box::new(Field::new(
                     "field", data_type, true,
                 ))))
             }
+            SQLDataType::Array(None) => Err(DataFusionError::NotImplemented(
+                "Arrays with unspecified type is not supported".to_string(),
+            )),
             other => self.convert_simple_data_type(other),
         }
     }
@@ -2786,7 +2844,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             | SQLDataType::Varchar(_)
             | SQLDataType::Text
             | SQLDataType::String => Ok(DataType::Utf8),
-            SQLDataType::Timestamp(tz_info) => {
+            SQLDataType::Timestamp(None, tz_info) => {
                 let tz = if matches!(tz_info, TimezoneInfo::Tz)
                     || matches!(tz_info, TimezoneInfo::WithTimeZone)
                 {
@@ -2816,7 +2874,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 Ok(DataType::Timestamp(TimeUnit::Nanosecond, tz))
             }
             SQLDataType::Date => Ok(DataType::Date32),
-            SQLDataType::Time(tz_info) => {
+            SQLDataType::Time(None, tz_info) => {
                 if matches!(tz_info, TimezoneInfo::None)
                     || matches!(tz_info, TimezoneInfo::WithoutTimeZone)
                 {
@@ -2829,7 +2887,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                     )))
                 }
             }
-            SQLDataType::Decimal(exact_number_info) => {
+            SQLDataType::Numeric(exact_number_info)
+            |SQLDataType::Decimal(exact_number_info) => {
                 let (precision, scale) = match *exact_number_info {
                     ExactNumberInfo::None => (None, None),
                     ExactNumberInfo::Precision(precision) => (Some(precision), None),
@@ -2848,10 +2907,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             | SQLDataType::Binary(_)
             | SQLDataType::Varbinary(_)
             | SQLDataType::Blob(_)
-            | SQLDataType::Datetime
+            | SQLDataType::Datetime(_)
             | SQLDataType::Interval
             | SQLDataType::Regclass
-            | SQLDataType::Custom(_)
+            | SQLDataType::Custom(_, _)
             | SQLDataType::Array(_)
             | SQLDataType::Enum(_)
             | SQLDataType::Set(_)
@@ -2861,7 +2920,12 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             | SQLDataType::CharacterVarying(_)
             | SQLDataType::CharVarying(_)
             | SQLDataType::CharacterLargeObject(_)
-            | SQLDataType::CharLargeObject(_)
+                | SQLDataType::CharLargeObject(_)
+            // precision is not supported
+                | SQLDataType::Timestamp(Some(_), _)
+            // precision is not supported
+                | SQLDataType::Time(Some(_), _)
+                | SQLDataType::Dec(_)
             | SQLDataType::Clob(_) => Err(DataFusionError::NotImplemented(format!(
                 "Unsupported SQL type {:?}",
                 sql_type