You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/02/11 18:43:53 UTC

[arrow-datafusion] branch master updated: Update to sqlparser 0.14 (#1796)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 0a50dcd  Update to sqlparser 0.14 (#1796)
0a50dcd is described below

commit 0a50dcdfb1f2854e59b17da2d87c106c614b226d
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Fri Feb 11 13:43:49 2022 -0500

    Update to sqlparser 0.14 (#1796)
    
    * Update to datafusion 0.14
    
    * Update SQL planer for new sqlparser
    
    * Handle errors while parsing indexed fields
    
    * clippy
---
 ballista/rust/client/Cargo.toml |  2 +-
 ballista/rust/core/Cargo.toml   |  2 +-
 datafusion-common/Cargo.toml    |  2 +-
 datafusion-expr/Cargo.toml      |  2 +-
 datafusion/Cargo.toml           |  2 +-
 datafusion/src/sql/planner.rs   | 90 +++++++++++++++++++++++++----------------
 6 files changed, 60 insertions(+), 40 deletions(-)

diff --git a/ballista/rust/client/Cargo.toml b/ballista/rust/client/Cargo.toml
index 1a10cbf..dda7630 100644
--- a/ballista/rust/client/Cargo.toml
+++ b/ballista/rust/client/Cargo.toml
@@ -34,7 +34,7 @@ futures = "0.3"
 log = "0.4"
 tokio = "1.0"
 tempfile = "3"
-sqlparser = "0.13"
+sqlparser = "0.14"
 parking_lot = "0.12"
 
 datafusion = { path = "../../../datafusion", version = "7.0.0" }
diff --git a/ballista/rust/core/Cargo.toml b/ballista/rust/core/Cargo.toml
index 154760c..ec9353c 100644
--- a/ballista/rust/core/Cargo.toml
+++ b/ballista/rust/core/Cargo.toml
@@ -37,7 +37,7 @@ hashbrown = "0.12"
 log = "0.4"
 prost = "0.9"
 serde = {version = "1", features = ["derive"]}
-sqlparser = "0.13"
+sqlparser = "0.14"
 tokio = "1.0"
 tonic = "0.6"
 uuid = { version = "0.8", features = ["v4"] }
diff --git a/datafusion-common/Cargo.toml b/datafusion-common/Cargo.toml
index 02a6426..081dc17 100644
--- a/datafusion-common/Cargo.toml
+++ b/datafusion-common/Cargo.toml
@@ -41,5 +41,5 @@ arrow = { version = "8.0.0", features = ["prettyprint"] }
 parquet = { version = "8.0.0", features = ["arrow"] }
 avro-rs = { version = "0.13", features = ["snappy"], optional = true }
 pyo3 = { version = "0.15", optional = true }
-sqlparser = "0.13"
+sqlparser = "0.14"
 ordered-float = "2.10"
diff --git a/datafusion-expr/Cargo.toml b/datafusion-expr/Cargo.toml
index abf652f..aac7652 100644
--- a/datafusion-expr/Cargo.toml
+++ b/datafusion-expr/Cargo.toml
@@ -37,5 +37,5 @@ path = "src/lib.rs"
 [dependencies]
 datafusion-common = { path = "../datafusion-common", version = "7.0.0" }
 arrow = { version = "8.0.0", features = ["prettyprint"] }
-sqlparser = "0.13"
+sqlparser = "0.14"
 ahash = { version = "0.7", default-features = false }
diff --git a/datafusion/Cargo.toml b/datafusion/Cargo.toml
index dc51fd5..63029a6 100644
--- a/datafusion/Cargo.toml
+++ b/datafusion/Cargo.toml
@@ -58,7 +58,7 @@ ahash = { version = "0.7", default-features = false }
 hashbrown = { version = "0.12", features = ["raw"] }
 arrow = { version = "8.0.0", features = ["prettyprint"] }
 parquet = { version = "8.0.0", features = ["arrow"] }
-sqlparser = "0.13"
+sqlparser = "0.14"
 paste = "^1.0"
 num_cpus = "1.13.0"
 chrono = { version = "0.4", default-features = false }
diff --git a/datafusion/src/sql/planner.rs b/datafusion/src/sql/planner.rs
index cbbea90..382dbf1 100644
--- a/datafusion/src/sql/planner.rs
+++ b/datafusion/src/sql/planner.rs
@@ -50,9 +50,10 @@ use arrow::datatypes::*;
 use hashbrown::HashMap;
 use sqlparser::ast::{
     BinaryOperator, DataType as SQLDataType, DateTimeField, Expr as SQLExpr, FunctionArg,
-    HiveDistributionStyle, Ident, Join, JoinConstraint, JoinOperator, ObjectName, Query,
-    Select, SelectItem, SetExpr, SetOperator, ShowStatementFilter, TableFactor,
-    TableWithJoins, TrimWhereField, UnaryOperator, Value, Values as SQLValues,
+    FunctionArgExpr, HiveDistributionStyle, Ident, Join, JoinConstraint, JoinOperator,
+    ObjectName, Query, Select, SelectItem, SetExpr, SetOperator, ShowStatementFilter,
+    TableFactor, TableWithJoins, TrimWhereField, UnaryOperator, Value,
+    Values as SQLValues,
 };
 use sqlparser::ast::{ColumnDef as SQLColumnDef, ColumnOption};
 use sqlparser::ast::{ObjectType, OrderByExpr, Statement};
@@ -85,30 +86,40 @@ pub struct SqlToRel<'a, S: ContextProvider> {
     schema_provider: &'a S,
 }
 
-fn plan_key(key: Value) -> ScalarValue {
-    match key {
-        Value::Number(s, _) => ScalarValue::Int64(Some(s.parse().unwrap())),
-        Value::SingleQuotedString(s) => ScalarValue::Utf8(Some(s)),
-        _ => unreachable!(),
-    }
+fn plan_key(key: SQLExpr) -> Result<ScalarValue> {
+    let scalar = match key {
+        SQLExpr::Value(Value::Number(s, _)) => {
+            ScalarValue::Int64(Some(s.parse().unwrap()))
+        }
+        SQLExpr::Value(Value::SingleQuotedString(s)) => ScalarValue::Utf8(Some(s)),
+        _ => {
+            return Err(DataFusionError::SQL(ParserError(format!(
+                "Unsuported index key expression: {}",
+                key
+            ))))
+        }
+    };
+
+    Ok(scalar)
 }
 
-#[allow(clippy::branches_sharing_code)]
-fn plan_indexed(expr: Expr, mut keys: Vec<Value>) -> Expr {
-    if keys.len() == 1 {
-        let key = keys.pop().unwrap();
-        Expr::GetIndexedField {
-            expr: Box::new(expr),
-            key: plan_key(key),
-        }
+fn plan_indexed(expr: Expr, mut keys: Vec<SQLExpr>) -> Result<Expr> {
+    let key = keys.pop().ok_or_else(|| {
+        DataFusionError::SQL(ParserError(
+            "Internal error: Missing index key expression".to_string(),
+        ))
+    })?;
+
+    let expr = if !keys.is_empty() {
+        plan_indexed(expr, keys)?
     } else {
-        let key = keys.pop().unwrap();
-        let expr = Box::new(plan_indexed(expr, keys));
-        Expr::GetIndexedField {
-            expr,
-            key: plan_key(key),
-        }
-    }
+        expr
+    };
+
+    Ok(Expr::GetIndexedField {
+        expr: Box::new(expr),
+        key: plan_key(key)?,
+    })
 }
 
 impl<'a, S: ContextProvider> SqlToRel<'a, S> {
@@ -153,6 +164,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 external: false,
                 if_not_exists: false,
                 without_rowid: _without_row_id,
+                engine: _engine,
+                default_charset: _default_charset,
             } if columns.is_empty()
                 && constraints.is_empty()
                 && table_properties.is_empty()
@@ -1241,11 +1254,20 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         sql: &FunctionArg,
         schema: &DFSchema,
     ) -> Result<Expr> {
-        match sql {
-            FunctionArg::Named { name: _, arg } => {
-                self.sql_expr_to_logical_expr(arg, schema)
+        let arg: &FunctionArgExpr = match sql {
+            FunctionArg::Named { name: _, arg } => arg,
+            FunctionArg::Unnamed(arg) => arg,
+        };
+
+        match arg {
+            FunctionArgExpr::Expr(arg) => self.sql_expr_to_logical_expr(arg, schema),
+            FunctionArgExpr::Wildcard => Ok(Expr::Wildcard),
+            FunctionArgExpr::QualifiedWildcard(_) => {
+                Err(DataFusionError::NotImplemented(format!(
+                    "Unsupported qualified wildcard argument: {:?}",
+                    sql
+                )))
             }
-            FunctionArg::Unnamed(value) => self.sql_expr_to_logical_expr(value, schema),
         }
     }
 
@@ -1409,7 +1431,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
 
             SQLExpr::MapAccess { ref column, keys } => {
                 if let SQLExpr::Identifier(ref id) = column.as_ref() {
-                    Ok(plan_indexed(col(&id.value), keys.clone()))
+                    plan_indexed(col(&id.value), keys.clone())
                 } else {
                     Err(DataFusionError::NotImplemented(format!(
                         "map access requires an identifier, found column {} instead",
@@ -1440,8 +1462,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 }
             }
 
-            SQLExpr::Wildcard => Ok(Expr::Wildcard),
-
             SQLExpr::Case {
                 operand,
                 conditions,
@@ -1773,10 +1793,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 .args
                 .iter()
                 .map(|a| match a {
-                    FunctionArg::Unnamed(SQLExpr::Value(Value::Number(_, _))) => {
-                        Ok(lit(1_u8))
-                    }
-                    FunctionArg::Unnamed(SQLExpr::Wildcard) => Ok(lit(1_u8)),
+                    FunctionArg::Unnamed(FunctionArgExpr::Expr(SQLExpr::Value(
+                        Value::Number(_, _),
+                    ))) => Ok(lit(1_u8)),
+                    FunctionArg::Unnamed(FunctionArgExpr::Wildcard) => Ok(lit(1_u8)),
                     _ => self.sql_fn_arg_to_logical_expr(a, schema),
                 })
                 .collect::<Result<Vec<Expr>>>()