You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/02/07 13:48:21 UTC
[arrow-datafusion] branch master updated: fix: Case insensitive unquoted identifiers (#1747)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new d014ff2 fix: Case insensitive unquoted identifiers (#1747)
d014ff2 is described below
commit d014ff218012fb019e9f625416d4cf35051db107
Author: Marko Mikulicic <mm...@gmail.com>
AuthorDate: Mon Feb 7 14:48:17 2022 +0100
fix: Case insensitive unquoted identifiers (#1747)
---
datafusion/src/execution/context.rs | 167 ++++++++++++++++++++++++++++++++++++
datafusion/src/sql/planner.rs | 18 ++--
datafusion/src/sql/utils.rs | 9 ++
3 files changed, 182 insertions(+), 12 deletions(-)
diff --git a/datafusion/src/execution/context.rs b/datafusion/src/execution/context.rs
index 96e49c8..2f8663e 100644
--- a/datafusion/src/execution/context.rs
+++ b/datafusion/src/execution/context.rs
@@ -3359,6 +3359,173 @@ mod tests {
assert_eq!(result[0].schema().metadata(), result[1].schema().metadata());
}
+ #[tokio::test]
+ async fn normalized_column_identifiers() {
+ // create local execution context
+ let mut ctx = ExecutionContext::new();
+
+ // register csv file with the execution context
+ ctx.register_csv(
+ "case_insensitive_test",
+ "tests/example.csv",
+ CsvReadOptions::new(),
+ )
+ .await
+ .unwrap();
+
+ let sql = "SELECT A, b FROM case_insensitive_test";
+ let result = plan_and_collect(&mut ctx, sql)
+ .await
+ .expect("ran plan correctly");
+ let expected = vec![
+ "+---+---+",
+ "| a | b |",
+ "+---+---+",
+ "| 1 | 2 |",
+ "+---+---+",
+ ];
+ assert_batches_sorted_eq!(expected, &result);
+
+ let sql = "SELECT t.A, b FROM case_insensitive_test AS t";
+ let result = plan_and_collect(&mut ctx, sql)
+ .await
+ .expect("ran plan correctly");
+ let expected = vec![
+ "+---+---+",
+ "| a | b |",
+ "+---+---+",
+ "| 1 | 2 |",
+ "+---+---+",
+ ];
+ assert_batches_sorted_eq!(expected, &result);
+
+ // Aliases
+
+ let sql = "SELECT t.A as x, b FROM case_insensitive_test AS t";
+ let result = plan_and_collect(&mut ctx, sql)
+ .await
+ .expect("ran plan correctly");
+ let expected = vec![
+ "+---+---+",
+ "| x | b |",
+ "+---+---+",
+ "| 1 | 2 |",
+ "+---+---+",
+ ];
+ assert_batches_sorted_eq!(expected, &result);
+
+ let sql = "SELECT t.A AS X, b FROM case_insensitive_test AS t";
+ let result = plan_and_collect(&mut ctx, sql)
+ .await
+ .expect("ran plan correctly");
+ let expected = vec![
+ "+---+---+",
+ "| x | b |",
+ "+---+---+",
+ "| 1 | 2 |",
+ "+---+---+",
+ ];
+ assert_batches_sorted_eq!(expected, &result);
+
+ let sql = r#"SELECT t.A AS "X", b FROM case_insensitive_test AS t"#;
+ let result = plan_and_collect(&mut ctx, sql)
+ .await
+ .expect("ran plan correctly");
+ let expected = vec![
+ "+---+---+",
+ "| X | b |",
+ "+---+---+",
+ "| 1 | 2 |",
+ "+---+---+",
+ ];
+ assert_batches_sorted_eq!(expected, &result);
+
+ // Order by
+
+ let sql = "SELECT t.A AS x, b FROM case_insensitive_test AS t ORDER BY x";
+ let result = plan_and_collect(&mut ctx, sql)
+ .await
+ .expect("ran plan correctly");
+ let expected = vec![
+ "+---+---+",
+ "| x | b |",
+ "+---+---+",
+ "| 1 | 2 |",
+ "+---+---+",
+ ];
+ assert_batches_sorted_eq!(expected, &result);
+
+ let sql = "SELECT t.A AS x, b FROM case_insensitive_test AS t ORDER BY X";
+ let result = plan_and_collect(&mut ctx, sql)
+ .await
+ .expect("ran plan correctly");
+ let expected = vec![
+ "+---+---+",
+ "| x | b |",
+ "+---+---+",
+ "| 1 | 2 |",
+ "+---+---+",
+ ];
+ assert_batches_sorted_eq!(expected, &result);
+
+ let sql = r#"SELECT t.A AS "X", b FROM case_insensitive_test AS t ORDER BY "X""#;
+ let result = plan_and_collect(&mut ctx, sql)
+ .await
+ .expect("ran plan correctly");
+ let expected = vec![
+ "+---+---+",
+ "| X | b |",
+ "+---+---+",
+ "| 1 | 2 |",
+ "+---+---+",
+ ];
+ assert_batches_sorted_eq!(expected, &result);
+
+ // Where
+
+ let sql = "SELECT a, b FROM case_insensitive_test where A IS NOT null";
+ let result = plan_and_collect(&mut ctx, sql)
+ .await
+ .expect("ran plan correctly");
+ let expected = vec![
+ "+---+---+",
+ "| a | b |",
+ "+---+---+",
+ "| 1 | 2 |",
+ "+---+---+",
+ ];
+ assert_batches_sorted_eq!(expected, &result);
+
+ // Group by
+
+ let sql = "SELECT a as x, count(*) as c FROM case_insensitive_test GROUP BY X";
+ let result = plan_and_collect(&mut ctx, sql)
+ .await
+ .expect("ran plan correctly");
+ let expected = vec![
+ "+---+---+",
+ "| x | c |",
+ "+---+---+",
+ "| 1 | 1 |",
+ "+---+---+",
+ ];
+ assert_batches_sorted_eq!(expected, &result);
+
+ let sql =
+ r#"SELECT a as "X", count(*) as c FROM case_insensitive_test GROUP BY "X""#;
+ let result = plan_and_collect(&mut ctx, sql)
+ .await
+ .expect("ran plan correctly");
+ let expected = vec![
+ "+---+---+",
+ "| X | c |",
+ "+---+---+",
+ "| 1 | 1 |",
+ "+---+---+",
+ ];
+ assert_batches_sorted_eq!(expected, &result);
+ }
+
struct MyPhysicalPlanner {}
#[async_trait]
diff --git a/datafusion/src/sql/planner.rs b/datafusion/src/sql/planner.rs
index 4629772..682b92b 100644
--- a/datafusion/src/sql/planner.rs
+++ b/datafusion/src/sql/planner.rs
@@ -36,7 +36,7 @@ use crate::logical_plan::{
use crate::optimizer::utils::exprlist_to_columns;
use crate::prelude::JoinType;
use crate::scalar::ScalarValue;
-use crate::sql::utils::make_decimal_type;
+use crate::sql::utils::{make_decimal_type, normalize_ident};
use crate::{
error::{DataFusionError, Result},
physical_plan::udaf::AggregateUDF,
@@ -1191,7 +1191,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
SelectItem::UnnamedExpr(expr) => self.sql_to_rex(expr, schema),
SelectItem::ExprWithAlias { expr, alias } => Ok(Alias(
Box::new(self.sql_to_rex(expr, schema)?),
- alias.value.clone(),
+ normalize_ident(alias),
)),
SelectItem::Wildcard => Ok(Expr::Wildcard),
SelectItem::QualifiedWildcard(_) => Err(DataFusionError::NotImplemented(
@@ -1392,6 +1392,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
SQLExpr::Identifier(ref id) => {
if id.value.starts_with('@') {
+ // TODO: figure out if ScalarVariables should be insensitive.
let var_names = vec![id.value.clone()];
Ok(Expr::ScalarVariable(var_names))
} else {
@@ -1401,7 +1402,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
// identifier. (e.g. it is "foo.bar" not foo.bar)
Ok(Expr::Column(Column {
relation: None,
- name: id.value.clone(),
+ name: normalize_ident(id),
}))
}
}
@@ -1418,8 +1419,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
}
SQLExpr::CompoundIdentifier(ids) => {
- let mut var_names: Vec<_> =
- ids.iter().map(|id| id.value.clone()).collect();
+ let mut var_names: Vec<_> = ids.iter().map(normalize_ident).collect();
if &var_names[0][0..1] == "@" {
Ok(Expr::ScalarVariable(var_names))
@@ -1639,13 +1639,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
// (e.g. "foo.bar") for function names yet
function.name.to_string()
} else {
- // if there is a quote style, then don't normalize
- // the name, otherwise normalize to lowercase
- let ident = &function.name.0[0];
- match ident.quote_style {
- Some(_) => ident.value.clone(),
- None => ident.value.to_ascii_lowercase(),
- }
+ normalize_ident(&function.name.0[0])
};
// first, scalar built-in
diff --git a/datafusion/src/sql/utils.rs b/datafusion/src/sql/utils.rs
index 0ede5ad..d0cef0f 100644
--- a/datafusion/src/sql/utils.rs
+++ b/datafusion/src/sql/utils.rs
@@ -18,6 +18,7 @@
//! SQL Utility Functions
use arrow::datatypes::DataType;
+use sqlparser::ast::Ident;
use crate::logical_plan::{Expr, LogicalPlan};
use crate::scalar::{ScalarValue, MAX_PRECISION_FOR_DECIMAL128};
@@ -532,6 +533,14 @@ pub(crate) fn make_decimal_type(
}
}
+// Normalize an identifer to a lowercase string unless the identifier is quoted.
+pub(crate) fn normalize_ident(id: &Ident) -> String {
+ match id.quote_style {
+ Some(_) => id.value.clone(),
+ None => id.value.to_ascii_lowercase(),
+ }
+}
+
#[cfg(test)]
mod tests {
use super::*;