You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2022/04/28 20:38:32 UTC
[arrow-datafusion] branch master updated: Introduce new `DataFusionError::SchemaError` type (#2371)
This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new af6be6e8c Introduce new `DataFusionError::SchemaError` type (#2371)
af6be6e8c is described below
commit af6be6e8c42ff4309b179113d66c55c395b7005c
Author: Andy Grove <an...@gmail.com>
AuthorDate: Thu Apr 28 14:38:27 2022 -0600
Introduce new `DataFusionError::SchemaError` type (#2371)
---
datafusion/common/src/dfschema.rs | 84 +++++++++++++++--------------
datafusion/common/src/error.rs | 72 ++++++++++++++++++++++++-
datafusion/common/src/lib.rs | 2 +-
datafusion/core/src/logical_plan/builder.rs | 29 +++++-----
datafusion/core/src/sql/planner.rs | 2 +-
5 files changed, 132 insertions(+), 57 deletions(-)
diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs
index 33c4193e2..209c5e3ed 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -22,7 +22,7 @@ use std::collections::{HashMap, HashSet};
use std::convert::TryFrom;
use std::sync::Arc;
-use crate::error::{DataFusionError, Result};
+use crate::error::{DataFusionError, Result, SchemaError};
use crate::Column;
use arrow::compute::can_cast_types;
@@ -67,16 +67,19 @@ impl DFSchema {
for field in &fields {
if let Some(qualifier) = field.qualifier() {
if !qualified_names.insert((qualifier, field.name())) {
- return Err(DataFusionError::Plan(format!(
- "Schema contains duplicate qualified field name '{}'",
- field.qualified_name()
- )));
+ return Err(DataFusionError::SchemaError(
+ SchemaError::DuplicateQualifiedField {
+ qualifier: qualifier.to_string(),
+ name: field.name().to_string(),
+ },
+ ));
}
} else if !unqualified_names.insert(field.name()) {
- return Err(DataFusionError::Plan(format!(
- "Schema contains duplicate unqualified field name '{}'",
- field.name()
- )));
+ return Err(DataFusionError::SchemaError(
+ SchemaError::DuplicateUnqualifiedField {
+ name: field.name().to_string(),
+ },
+ ));
}
}
@@ -94,11 +97,12 @@ impl DFSchema {
});
for (qualifier, name) in &qualified_names {
if unqualified_names.contains(name) {
- return Err(DataFusionError::Plan(format!(
- "Schema contains qualified field name '{}.{}' \
- and unqualified field name '{}' which would be ambiguous",
- qualifier, name, name
- )));
+ return Err(DataFusionError::SchemaError(
+ SchemaError::AmbiguousReference {
+ qualifier: Some(qualifier.to_string()),
+ name: name.to_string(),
+ },
+ ));
}
}
Ok(Self { fields, metadata })
@@ -178,11 +182,11 @@ impl DFSchema {
}
}
- Err(DataFusionError::Plan(format!(
- "No field named '{}'. Valid fields are {}.",
- name,
- self.get_field_names()
- )))
+ Err(DataFusionError::SchemaError(SchemaError::FieldNotExist {
+ qualifier: None,
+ name: name.to_string(),
+ valid_fields: Some(self.get_field_names()),
+ }))
}
pub fn index_of_column_by_name(
@@ -206,12 +210,11 @@ impl DFSchema {
})
.map(|(idx, _)| idx);
match matches.next() {
- None => Err(DataFusionError::Plan(format!(
- "No field named '{}.{}'. Valid fields are {}.",
- qualifier.unwrap_or("<unqualified>"),
- name,
- self.get_field_names()
- ))),
+ None => Err(DataFusionError::SchemaError(SchemaError::FieldNotExist {
+ qualifier: qualifier.map(|s| s.to_string()),
+ name: name.to_string(),
+ valid_fields: Some(self.get_field_names()),
+ })),
Some(idx) => match matches.next() {
None => Ok(idx),
// found more than one matches
@@ -262,16 +265,18 @@ impl DFSchema {
pub fn field_with_unqualified_name(&self, name: &str) -> Result<&DFField> {
let matches = self.fields_with_unqualified_name(name);
match matches.len() {
- 0 => Err(DataFusionError::Plan(format!(
- "No field with unqualified name '{}'. Valid fields are {}.",
- name,
- self.get_field_names()
- ))),
+ 0 => Err(DataFusionError::SchemaError(SchemaError::FieldNotExist {
+ qualifier: None,
+ name: name.to_string(),
+ valid_fields: Some(self.get_field_names()),
+ })),
1 => Ok(matches[0]),
- _ => Err(DataFusionError::Plan(format!(
- "Ambiguous reference to field named '{}'",
- name
- ))),
+ _ => Err(DataFusionError::SchemaError(
+ SchemaError::AmbiguousReference {
+ qualifier: None,
+ name: name.to_string(),
+ },
+ )),
}
}
@@ -314,7 +319,7 @@ impl DFSchema {
.try_for_each(|(l_field, r_field)| {
if !can_cast_types(r_field.data_type(), l_field.data_type()) {
Err(DataFusionError::Plan(
- format!("Column {} (type: {}) is not compatible wiht column {} (type: {})",
+ format!("Column {} (type: {}) is not compatible with column {} (type: {})",
r_field.name(),
r_field.data_type(),
l_field.name(),
@@ -350,7 +355,7 @@ impl DFSchema {
}
/// Get comma-separated list of field names for use in error messages
- fn get_field_names(&self) -> String {
+ fn get_field_names(&self) -> Vec<String> {
self.fields
.iter()
.map(|f| match f.qualifier() {
@@ -358,7 +363,6 @@ impl DFSchema {
None => format!("'{}'", f.name()),
})
.collect::<Vec<_>>()
- .join(", ")
}
/// Get metadata of this schema
@@ -679,7 +683,7 @@ mod tests {
let join = left.join(&right);
assert!(join.is_err());
assert_eq!(
- "Error during planning: Schema contains duplicate \
+ "Schema error: Schema contains duplicate \
qualified field name \'t1.c0\'",
&format!("{}", join.err().unwrap())
);
@@ -693,7 +697,7 @@ mod tests {
let join = left.join(&right);
assert!(join.is_err());
assert_eq!(
- "Error during planning: Schema contains duplicate \
+ "Schema error: Schema contains duplicate \
unqualified field name \'c0\'",
&format!("{}", join.err().unwrap())
);
@@ -728,7 +732,7 @@ mod tests {
let join = left.join(&right);
assert!(join.is_err());
assert_eq!(
- "Error during planning: Schema contains qualified \
+ "Schema error: Schema contains qualified \
field name \'t1.c0\' and unqualified field name \'c0\' which would be ambiguous",
&format!("{}", join.err().unwrap())
);
diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs
index 4a82ac3e9..5e784f4ac 100644
--- a/datafusion/common/src/error.rs
+++ b/datafusion/common/src/error.rs
@@ -62,8 +62,11 @@ pub enum DataFusionError {
// This error is raised when one of those invariants is not verified during execution.
Internal(String),
/// This error happens whenever a plan is not valid. Examples include
- /// impossible casts, schema inference not possible and non-unique column names.
+ /// impossible casts.
Plan(String),
+ /// This error happens with schema-related errors, such as schema inference not possible
+ /// and non-unique column names.
+ SchemaError(SchemaError),
/// Error returned during execution of the query.
/// Examples include files not found, errors in parsing certain types.
Execution(String),
@@ -78,6 +81,70 @@ pub enum DataFusionError {
JITError(ModuleError),
}
+/// Schema-related errors
+#[derive(Debug)]
+pub enum SchemaError {
+ /// Schema contains a (possibly) qualified and unqualified field with same unqualified name
+ AmbiguousReference {
+ qualifier: Option<String>,
+ name: String,
+ },
+ /// Schema contains duplicate qualified field name
+ DuplicateQualifiedField { qualifier: String, name: String },
+ /// Schema contains duplicate unqualified field name
+ DuplicateUnqualifiedField { name: String },
+ /// No field with this name
+ FieldNotExist {
+ qualifier: Option<String>,
+ name: String,
+ valid_fields: Option<Vec<String>>,
+ },
+}
+
+impl Display for SchemaError {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ match self {
+ Self::FieldNotExist {
+ qualifier,
+ name,
+ valid_fields,
+ } => {
+ write!(f, "No field named ")?;
+ if let Some(q) = qualifier {
+ write!(f, "'{}.{}'", q, name)?;
+ } else {
+ write!(f, "'{}'", name)?;
+ }
+ if let Some(field_names) = valid_fields {
+ write!(f, ". Valid fields are {}", field_names.join(", "))?;
+ }
+ write!(f, ".")
+ }
+ Self::DuplicateQualifiedField { qualifier, name } => {
+ write!(
+ f,
+ "Schema contains duplicate qualified field name '{}.{}'",
+ qualifier, name
+ )
+ }
+ Self::DuplicateUnqualifiedField { name } => {
+ write!(
+ f,
+ "Schema contains duplicate unqualified field name '{}'",
+ name
+ )
+ }
+ Self::AmbiguousReference { qualifier, name } => {
+ if let Some(q) = qualifier {
+ write!(f, "Schema contains qualified field name '{}.{}' and unqualified field name '{}' which would be ambiguous", q, name, name)
+ } else {
+ write!(f, "Ambiguous reference to unqualified field '{}'", name)
+ }
+ }
+ }
+ }
+}
+
impl From<io::Error> for DataFusionError {
fn from(e: io::Error) -> Self {
DataFusionError::IoError(e)
@@ -159,6 +226,9 @@ impl Display for DataFusionError {
DataFusionError::Plan(ref desc) => {
write!(f, "Error during planning: {}", desc)
}
+ DataFusionError::SchemaError(ref desc) => {
+ write!(f, "Schema error: {}", desc)
+ }
DataFusionError::Execution(ref desc) => {
write!(f, "Execution error: {}", desc)
}
diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs
index d39020f72..824b0ca6c 100644
--- a/datafusion/common/src/lib.rs
+++ b/datafusion/common/src/lib.rs
@@ -24,5 +24,5 @@ mod scalar;
pub use column::Column;
pub use dfschema::{DFField, DFSchema, DFSchemaRef, ExprSchema, ToDFSchema};
-pub use error::{DataFusionError, Result};
+pub use error::{DataFusionError, Result, SchemaError};
pub use scalar::{ScalarType, ScalarValue};
diff --git a/datafusion/core/src/logical_plan/builder.rs b/datafusion/core/src/logical_plan/builder.rs
index 0b0fdebb6..4f00bec60 100644
--- a/datafusion/core/src/logical_plan/builder.rs
+++ b/datafusion/core/src/logical_plan/builder.rs
@@ -1202,6 +1202,7 @@ pub(crate) fn expand_qualified_wildcard(
#[cfg(test)]
mod tests {
use arrow::datatypes::{DataType, Field};
+ use datafusion_common::SchemaError;
use datafusion_expr::expr_fn::exists;
use crate::logical_plan::StringifiedPlan;
@@ -1428,16 +1429,16 @@ mod tests {
.project(vec![col("id"), col("first_name").alias("id")]);
match plan {
- Err(DataFusionError::Plan(e)) => {
- assert_eq!(
- e,
- "Schema contains qualified field name 'employee_csv.id' \
- and unqualified field name 'id' which would be ambiguous"
- );
+ Err(DataFusionError::SchemaError(SchemaError::AmbiguousReference {
+ qualifier,
+ name,
+ })) => {
+ assert_eq!("employee_csv", qualifier.unwrap().as_str());
+ assert_eq!("id", &name);
Ok(())
}
_ => Err(DataFusionError::Plan(
- "Plan should have returned an DataFusionError::Plan".to_string(),
+ "Plan should have returned an DataFusionError::SchemaError".to_string(),
)),
}
}
@@ -1454,16 +1455,16 @@ mod tests {
.aggregate(vec![col("state")], vec![sum(col("salary")).alias("state")]);
match plan {
- Err(DataFusionError::Plan(e)) => {
- assert_eq!(
- e,
- "Schema contains qualified field name 'employee_csv.state' and \
- unqualified field name 'state' which would be ambiguous"
- );
+ Err(DataFusionError::SchemaError(SchemaError::AmbiguousReference {
+ qualifier,
+ name,
+ })) => {
+ assert_eq!("employee_csv", qualifier.unwrap().as_str());
+ assert_eq!("state", &name);
Ok(())
}
_ => Err(DataFusionError::Plan(
- "Plan should have returned an DataFusionError::Plan".to_string(),
+ "Plan should have returned an DataFusionError::SchemaError".to_string(),
)),
}
}
diff --git a/datafusion/core/src/sql/planner.rs b/datafusion/core/src/sql/planner.rs
index 253d3f454..b34b78d5f 100644
--- a/datafusion/core/src/sql/planner.rs
+++ b/datafusion/core/src/sql/planner.rs
@@ -3764,7 +3764,7 @@ mod tests {
let err = logical_plan(sql).expect_err("query should have failed");
assert_eq!(
"Plan(\"Column Int64(1) (type: Int64) is \
- not compatible wiht column IntervalMonthDayNano\
+ not compatible with column IntervalMonthDayNano\
(\\\"950737950189618795196236955648\\\") \
(type: Interval(MonthDayNano))\")",
format!("{:?}", err)