You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/11/17 15:16:06 UTC
[arrow-datafusion] branch master updated: Update sqlparser requirement from 0.26 to 0.27 (#4226)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new a0581dc61 Update sqlparser requirement from 0.26 to 0.27 (#4226)
a0581dc61 is described below
commit a0581dc61beef8400157177134956e0875934a14
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Thu Nov 17 10:16:01 2022 -0500
Update sqlparser requirement from 0.26 to 0.27 (#4226)
* Update sqlparser requirement from 0.26 to 0.27
Updates the requirements on [sqlparser](https://github.com/sqlparser-rs/sqlparser-rs) to permit the latest version.
- [Release notes](https://github.com/sqlparser-rs/sqlparser-rs/releases)
- [Changelog](https://github.com/sqlparser-rs/sqlparser-rs/blob/main/CHANGELOG.md)
- [Commits](https://github.com/sqlparser-rs/sqlparser-rs/compare/v0.26.0...v0.27.0)
---
updated-dependencies:
- dependency-name: sqlparser
dependency-type: direct:production
...
Signed-off-by: dependabot[bot] <su...@github.com>
* Update to sqlparser 0.27
* Update datafusion-cli lock
* fix up some tests
* re-implement handling for array_agg
* add test
Signed-off-by: dependabot[bot] <su...@github.com>
Co-authored-by: dependabot[bot] <49...@users.noreply.github.com>
---
datafusion-cli/Cargo.lock | 4 +-
datafusion-cli/src/object_storage.rs | 2 +-
datafusion/common/Cargo.toml | 2 +-
datafusion/core/Cargo.toml | 2 +-
datafusion/core/tests/sql/aggregates.rs | 32 +++++++++++++
datafusion/core/tests/sql/timestamp.rs | 4 +-
datafusion/expr/Cargo.toml | 2 +-
datafusion/sql/Cargo.toml | 2 +-
datafusion/sql/src/planner.rs | 82 +++++++++++++++++++++++++++++----
9 files changed, 114 insertions(+), 18 deletions(-)
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index b42c20390..efbbf762b 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -2207,9 +2207,9 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
[[package]]
name = "sqlparser"
-version = "0.26.0"
+version = "0.27.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "86be66ea0b2b22749cfa157d16e2e84bf793e626a3375f4d378dc289fa03affb"
+checksum = "aba319938d4bfe250a769ac88278b629701024fe16f34257f9563bc628081970"
dependencies = [
"log",
]
diff --git a/datafusion-cli/src/object_storage.rs b/datafusion-cli/src/object_storage.rs
index 0982f3ff9..64c48840e 100644
--- a/datafusion-cli/src/object_storage.rs
+++ b/datafusion-cli/src/object_storage.rs
@@ -145,7 +145,7 @@ mod tests {
let res = provider.get_by_url(&url);
let msg = match res {
Err(e) => format!("{}", e),
- Ok(_) => "".to_string()
+ Ok(_) => "".to_string(),
};
assert_eq!("".to_string(), msg); // Fail with error message
env::remove_var("AWS_REGION");
diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml
index 69d01b301..7284f3f37 100644
--- a/datafusion/common/Cargo.toml
+++ b/datafusion/common/Cargo.toml
@@ -46,4 +46,4 @@ cranelift-module = { version = "0.89.0", optional = true }
object_store = { version = "0.5.0", default-features = false, optional = true }
parquet = { version = "27.0.0", default-features = false, optional = true }
pyo3 = { version = "0.17.1", optional = true }
-sqlparser = "0.26"
+sqlparser = "0.27"
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index efd28ee9c..ae073c7c0 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -89,7 +89,7 @@ pyo3 = { version = "0.17.1", optional = true }
rand = "0.8"
rayon = { version = "1.5", optional = true }
smallvec = { version = "1.6", features = ["union"] }
-sqlparser = "0.26"
+sqlparser = "0.27"
tempfile = "3"
tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] }
tokio-stream = "0.1"
diff --git a/datafusion/core/tests/sql/aggregates.rs b/datafusion/core/tests/sql/aggregates.rs
index b4c25fd9e..91f75c0eb 100644
--- a/datafusion/core/tests/sql/aggregates.rs
+++ b/datafusion/core/tests/sql/aggregates.rs
@@ -1318,6 +1318,38 @@ async fn csv_query_array_agg_with_overflow() -> Result<()> {
Ok(())
}
+#[tokio::test]
+async fn csv_query_array_agg_unsupported() -> Result<()> {
+ let ctx = SessionContext::new();
+ register_aggregate_csv(&ctx).await?;
+
+ let results = plan_and_collect(
+ &ctx,
+ "SELECT array_agg(c13 ORDER BY c1) FROM aggregate_test_100",
+ )
+ .await
+ .unwrap_err();
+
+ assert_eq!(
+ results.to_string(),
+ "This feature is not implemented: ORDER BY not supported in ARRAY_AGG: c1"
+ );
+
+ let results = plan_and_collect(
+ &ctx,
+ "SELECT array_agg(c13 LIMIT 1) FROM aggregate_test_100",
+ )
+ .await
+ .unwrap_err();
+
+ assert_eq!(
+ results.to_string(),
+ "This feature is not implemented: LIMIT not supported in ARRAY_AGG: 1"
+ );
+
+ Ok(())
+}
+
#[tokio::test]
async fn csv_query_array_cube_agg_with_overflow() -> Result<()> {
let ctx = SessionContext::new();
diff --git a/datafusion/core/tests/sql/timestamp.rs b/datafusion/core/tests/sql/timestamp.rs
index dec04f653..cb70ab2d0 100644
--- a/datafusion/core/tests/sql/timestamp.rs
+++ b/datafusion/core/tests/sql/timestamp.rs
@@ -1579,7 +1579,7 @@ async fn test_cast_to_time_with_time_zone_should_not_work() -> Result<()> {
assert_eq!(
results.to_string(),
- "This feature is not implemented: Unsupported SQL type Time(WithTimeZone)"
+ "This feature is not implemented: Unsupported SQL type Time(None, WithTimeZone)"
);
Ok(())
@@ -1612,7 +1612,7 @@ async fn test_cast_to_timetz_should_not_work() -> Result<()> {
assert_eq!(
results.to_string(),
- "This feature is not implemented: Unsupported SQL type Time(Tz)"
+ "This feature is not implemented: Unsupported SQL type Time(None, Tz)"
);
Ok(())
}
diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml
index 6e6fd13d5..4527b2bd3 100644
--- a/datafusion/expr/Cargo.toml
+++ b/datafusion/expr/Cargo.toml
@@ -39,4 +39,4 @@ ahash = { version = "0.8", default-features = false, features = ["runtime-rng"]
arrow = { version = "27.0.0", default-features = false }
datafusion-common = { path = "../common", version = "14.0.0" }
log = "^0.4"
-sqlparser = "0.26"
+sqlparser = "0.27"
diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml
index 2c712ba86..ef4217021 100644
--- a/datafusion/sql/Cargo.toml
+++ b/datafusion/sql/Cargo.toml
@@ -40,4 +40,4 @@ unicode_expressions = []
arrow = { version = "27.0.0", default-features = false }
datafusion-common = { path = "../common", version = "14.0.0" }
datafusion-expr = { path = "../expr", version = "14.0.0" }
-sqlparser = "0.26"
+sqlparser = "0.27"
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index d21252744..f3cd1674d 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -23,8 +23,8 @@ use std::sync::Arc;
use std::{convert::TryInto, vec};
use arrow::datatypes::*;
-use sqlparser::ast::ExactNumberInfo;
use sqlparser::ast::TimezoneInfo;
+use sqlparser::ast::{ArrayAgg, ExactNumberInfo, SetQuantifier};
use sqlparser::ast::{
BinaryOperator, DataType as SQLDataType, DateTimeField, Expr as SQLExpr, FunctionArg,
FunctionArgExpr, Ident, Join, JoinConstraint, JoinOperator, ObjectName,
@@ -460,8 +460,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
op,
left,
right,
- all,
+ set_quantifier,
} => {
+ let all = match set_quantifier {
+ SetQuantifier::All => true,
+ SetQuantifier::Distinct | SetQuantifier::None => false,
+ };
+
let left_plan =
self.set_expr_to_plan(*left, None, ctes, outer_query_schema)?;
let right_plan =
@@ -2320,6 +2325,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
SQLExpr::Subquery(subquery) => self.parse_scalar_subquery(&subquery, schema, ctes),
+ SQLExpr::ArrayAgg(array_agg) => self.parse_array_agg(array_agg, schema, ctes),
+
_ => Err(DataFusionError::NotImplemented(format!(
"Unsupported ast node in sqltorel: {:?}",
sql
@@ -2382,6 +2389,53 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
}))
}
+ fn parse_array_agg(
+ &self,
+ array_agg: ArrayAgg,
+ input_schema: &DFSchema,
+ ctes: &mut HashMap<String, LogicalPlan>,
+ ) -> Result<Expr> {
+ // Some dialects have special syntax for array_agg. DataFusion only supports it like a function.
+ let ArrayAgg {
+ distinct,
+ expr,
+ order_by,
+ limit,
+ within_group,
+ } = array_agg;
+
+ if let Some(order_by) = order_by {
+ return Err(DataFusionError::NotImplemented(format!(
+ "ORDER BY not supported in ARRAY_AGG: {}",
+ order_by
+ )));
+ }
+
+ if let Some(limit) = limit {
+ return Err(DataFusionError::NotImplemented(format!(
+ "LIMIT not supported in ARRAY_AGG: {}",
+ limit
+ )));
+ }
+
+ if within_group {
+ return Err(DataFusionError::NotImplemented(
+ "WITHIN GROUP not supported in ARRAY_AGG".to_string(),
+ ));
+ }
+
+ let args = vec![self.sql_expr_to_logical_expr(*expr, input_schema, ctes)?];
+ // next, aggregate built-ins
+ let fun = AggregateFunction::ArrayAgg;
+
+ Ok(Expr::AggregateFunction {
+ fun,
+ distinct,
+ args,
+ filter: None,
+ })
+ }
+
fn function_args_to_expr(
&self,
args: Vec<FunctionArg>,
@@ -2532,6 +2586,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
Value::SingleQuotedString(s) => s.to_string(),
Value::Number(_, _) | Value::Boolean(_) => v.to_string(),
Value::DoubleQuotedString(_)
+ | Value::UnQuotedString(_)
| Value::EscapedStringLiteral(_)
| Value::NationalStringLiteral(_)
| Value::HexStringLiteral(_)
@@ -2756,13 +2811,16 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
fn convert_data_type(&self, sql_type: &SQLDataType) -> Result<DataType> {
match sql_type {
- SQLDataType::Array(inner_sql_type) => {
+ SQLDataType::Array(Some(inner_sql_type)) => {
let data_type = self.convert_simple_data_type(inner_sql_type)?;
Ok(DataType::List(Box::new(Field::new(
"field", data_type, true,
))))
}
+ SQLDataType::Array(None) => Err(DataFusionError::NotImplemented(
+ "Arrays with unspecified type is not supported".to_string(),
+ )),
other => self.convert_simple_data_type(other),
}
}
@@ -2786,7 +2844,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
| SQLDataType::Varchar(_)
| SQLDataType::Text
| SQLDataType::String => Ok(DataType::Utf8),
- SQLDataType::Timestamp(tz_info) => {
+ SQLDataType::Timestamp(None, tz_info) => {
let tz = if matches!(tz_info, TimezoneInfo::Tz)
|| matches!(tz_info, TimezoneInfo::WithTimeZone)
{
@@ -2816,7 +2874,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
Ok(DataType::Timestamp(TimeUnit::Nanosecond, tz))
}
SQLDataType::Date => Ok(DataType::Date32),
- SQLDataType::Time(tz_info) => {
+ SQLDataType::Time(None, tz_info) => {
if matches!(tz_info, TimezoneInfo::None)
|| matches!(tz_info, TimezoneInfo::WithoutTimeZone)
{
@@ -2829,7 +2887,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
)))
}
}
- SQLDataType::Decimal(exact_number_info) => {
+ SQLDataType::Numeric(exact_number_info)
+ |SQLDataType::Decimal(exact_number_info) => {
let (precision, scale) = match *exact_number_info {
ExactNumberInfo::None => (None, None),
ExactNumberInfo::Precision(precision) => (Some(precision), None),
@@ -2848,10 +2907,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
| SQLDataType::Binary(_)
| SQLDataType::Varbinary(_)
| SQLDataType::Blob(_)
- | SQLDataType::Datetime
+ | SQLDataType::Datetime(_)
| SQLDataType::Interval
| SQLDataType::Regclass
- | SQLDataType::Custom(_)
+ | SQLDataType::Custom(_, _)
| SQLDataType::Array(_)
| SQLDataType::Enum(_)
| SQLDataType::Set(_)
@@ -2861,7 +2920,12 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
| SQLDataType::CharacterVarying(_)
| SQLDataType::CharVarying(_)
| SQLDataType::CharacterLargeObject(_)
- | SQLDataType::CharLargeObject(_)
+ | SQLDataType::CharLargeObject(_)
+ // precision is not supported
+ | SQLDataType::Timestamp(Some(_), _)
+ // precision is not supported
+ | SQLDataType::Time(Some(_), _)
+ | SQLDataType::Dec(_)
| SQLDataType::Clob(_) => Err(DataFusionError::NotImplemented(format!(
"Unsupported SQL type {:?}",
sql_type