You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2022/11/02 17:25:59 UTC

[arrow-datafusion] branch master updated: Improve FieldNotFound errors (#4084)

This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 8cd6129cf Improve FieldNotFound errors (#4084)
8cd6129cf is described below

commit 8cd6129cf244a058cc5767c34da3dcd0a393e62e
Author: Andy Grove <an...@gmail.com>
AuthorDate: Wed Nov 2 11:25:53 2022 -0600

    Improve FieldNotFound errors (#4084)
---
 datafusion/common/src/column.rs         | 13 +++++++---
 datafusion/common/src/dfschema.rs       | 18 +++++++++++---
 datafusion/common/src/error.rs          | 43 ++++++++++++++++++++-------------
 datafusion/core/tests/sql/references.rs |  2 +-
 datafusion/expr/src/expr_rewriter.rs    |  2 +-
 datafusion/sql/src/planner.rs           |  4 +--
 6 files changed, 55 insertions(+), 27 deletions(-)

diff --git a/datafusion/common/src/column.rs b/datafusion/common/src/column.rs
index 2c196b10b..d8adaacf2 100644
--- a/datafusion/common/src/column.rs
+++ b/datafusion/common/src/column.rs
@@ -34,6 +34,14 @@ pub struct Column {
 }
 
 impl Column {
+    /// Create Column from optional qualifier and name
+    pub fn new(relation: Option<impl Into<String>>, name: impl Into<String>) -> Self {
+        Self {
+            relation: relation.map(|r| r.into()),
+            name: name.into(),
+        }
+    }
+
     /// Create Column from unqualified name.
     pub fn from_name(name: impl Into<String>) -> Self {
         Self {
@@ -120,12 +128,11 @@ impl Column {
         }
 
         Err(DataFusionError::SchemaError(SchemaError::FieldNotFound {
-            qualifier: self.relation.clone(),
-            name: self.name,
+            field: Column::new(self.relation.clone(), self.name),
             valid_fields: Some(
                 schemas
                     .iter()
-                    .flat_map(|s| s.fields().iter().map(|f| f.qualified_name()))
+                    .flat_map(|s| s.fields().iter().map(|f| f.qualified_column()))
                     .collect(),
             ),
         }))
diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs
index e391c5fc1..59dd8791b 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -596,6 +596,18 @@ mod tests {
     use arrow::datatypes::DataType;
     use std::collections::BTreeMap;
 
+    #[test]
+    fn qualifier_in_name() -> Result<()> {
+        let schema = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
+        // lookup with unqualified name "t1.c0"
+        let err = schema.index_of_column_by_name(None, "t1.c0").err().unwrap();
+        assert_eq!(
+            "Schema error: No field named 't1.c0'. Valid fields are 't1'.'c0', 't1'.'c1'.",
+            &format!("{}", err)
+        );
+        Ok(())
+    }
+
     #[test]
     fn from_unqualified_field() {
         let field = Field::new("c0", DataType::Boolean, true);
@@ -663,7 +675,7 @@ mod tests {
         assert!(join.is_err());
         assert_eq!(
             "Schema error: Schema contains duplicate \
-        qualified field name \'t1.c0\'",
+        qualified field name \'t1\'.\'c0\'",
             &format!("{}", join.err().unwrap())
         );
         Ok(())
@@ -712,7 +724,7 @@ mod tests {
         assert!(join.is_err());
         assert_eq!(
             "Schema error: Schema contains qualified \
-        field name \'t1.c0\' and unqualified field name \'c0\' which would be ambiguous",
+        field name \'t1\'.\'c0\' and unqualified field name \'c0\' which would be ambiguous",
             &format!("{}", join.err().unwrap())
         );
         Ok(())
@@ -722,7 +734,7 @@ mod tests {
     #[test]
     fn helpful_error_messages() -> Result<()> {
         let schema = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
-        let expected_help = "Valid fields are \'t1.c0\', \'t1.c1\'.";
+        let expected_help = "Valid fields are \'t1\'.\'c0\', \'t1\'.\'c1\'.";
         // Pertinent message parts
         let expected_err_msg = "Fully qualified field name \'t1.c0\'";
         assert!(schema
diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs
index c2d3e389c..afd03976e 100644
--- a/datafusion/common/src/error.rs
+++ b/datafusion/common/src/error.rs
@@ -22,7 +22,7 @@ use std::fmt::{Display, Formatter};
 use std::io;
 use std::result;
 
-use crate::DFSchema;
+use crate::{Column, DFSchema};
 #[cfg(feature = "avro")]
 use apache_avro::Error as AvroError;
 use arrow::error::ArrowError;
@@ -123,9 +123,8 @@ pub enum SchemaError {
     DuplicateUnqualifiedField { name: String },
     /// No field with this name
     FieldNotFound {
-        qualifier: Option<String>,
-        name: String,
-        valid_fields: Option<Vec<String>>,
+        field: Column,
+        valid_fields: Option<Vec<Column>>,
     },
 }
 
@@ -136,9 +135,14 @@ pub fn field_not_found(
     schema: &DFSchema,
 ) -> DataFusionError {
     DataFusionError::SchemaError(SchemaError::FieldNotFound {
-        qualifier,
-        name: name.to_string(),
-        valid_fields: Some(schema.field_names()),
+        field: Column::new(qualifier, name),
+        valid_fields: Some(
+            schema
+                .fields()
+                .iter()
+                .map(|f| f.qualified_column())
+                .collect(),
+        ),
     })
 }
 
@@ -146,23 +150,28 @@ impl Display for SchemaError {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         match self {
             Self::FieldNotFound {
-                qualifier,
-                name,
+                field,
                 valid_fields,
             } => {
                 write!(f, "No field named ")?;
-                if let Some(q) = qualifier {
-                    write!(f, "'{}.{}'", q, name)?;
+                if let Some(q) = &field.relation {
+                    write!(f, "'{}'.'{}'", q, field.name)?;
                 } else {
-                    write!(f, "'{}'", name)?;
+                    write!(f, "'{}'", field.name)?;
                 }
-                if let Some(field_names) = valid_fields {
+                if let Some(fields) = valid_fields {
                     write!(
                         f,
                         ". Valid fields are {}",
-                        field_names
+                        fields
                             .iter()
-                            .map(|name| format!("'{}'", name))
+                            .map(|field| {
+                                if let Some(q) = &field.relation {
+                                    format!("'{}'.'{}'", q, field.name)
+                                } else {
+                                    format!("'{}'", field.name)
+                                }
+                            })
                             .collect::<Vec<String>>()
                             .join(", ")
                     )?;
@@ -172,7 +181,7 @@ impl Display for SchemaError {
             Self::DuplicateQualifiedField { qualifier, name } => {
                 write!(
                     f,
-                    "Schema contains duplicate qualified field name '{}.{}'",
+                    "Schema contains duplicate qualified field name '{}'.'{}'",
                     qualifier, name
                 )
             }
@@ -185,7 +194,7 @@ impl Display for SchemaError {
             }
             Self::AmbiguousReference { qualifier, name } => {
                 if let Some(q) = qualifier {
-                    write!(f, "Schema contains qualified field name '{}.{}' and unqualified field name '{}' which would be ambiguous", q, name, name)
+                    write!(f, "Schema contains qualified field name '{}'.'{}' and unqualified field name '{}' which would be ambiguous", q, name, name)
                 } else {
                     write!(f, "Ambiguous reference to unqualified field '{}'", name)
                 }
diff --git a/datafusion/core/tests/sql/references.rs b/datafusion/core/tests/sql/references.rs
index 77afdb66c..e63acc444 100644
--- a/datafusion/core/tests/sql/references.rs
+++ b/datafusion/core/tests/sql/references.rs
@@ -67,7 +67,7 @@ async fn qualified_table_references_and_fields() -> Result<()> {
     let error = ctx.create_logical_plan(sql).unwrap_err();
     assert_contains!(
         error.to_string(),
-        "No field named 'f1.c1'. Valid fields are 'test.f.c1', 'test.test.c2'"
+        "No field named 'f1'.'c1'. Valid fields are 'test'.'f.c1', 'test'.'test.c2'"
     );
 
     // however, enclosing it in double quotes is ok
diff --git a/datafusion/expr/src/expr_rewriter.rs b/datafusion/expr/src/expr_rewriter.rs
index 8ac645617..40516c537 100644
--- a/datafusion/expr/src/expr_rewriter.rs
+++ b/datafusion/expr/src/expr_rewriter.rs
@@ -673,7 +673,7 @@ mod test {
             .to_string();
         assert_eq!(
             error,
-            "Schema error: No field named 'b'. Valid fields are 'tableA.a'."
+            "Schema error: No field named 'b'. Valid fields are 'tableA'.'a'."
         );
     }
 
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index 68b84cc9f..21761a0bc 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -3585,8 +3585,8 @@ mod tests {
         let sql = "SELECT SUM(age) FROM person GROUP BY doesnotexist";
         let err = logical_plan(sql).expect_err("query should have failed");
         assert_eq!("Schema error: No field named 'doesnotexist'. Valid fields are 'SUM(person.age)', \
-        'person.id', 'person.first_name', 'person.last_name', 'person.age', 'person.state', \
-        'person.salary', 'person.birth_date', 'person.😀'.", format!("{}", err));
+        'person'.'id', 'person'.'first_name', 'person'.'last_name', 'person'.'age', 'person'.'state', \
+        'person'.'salary', 'person'.'birth_date', 'person'.'😀'.", format!("{}", err));
     }
 
     #[test]