You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/06/21 20:33:18 UTC

[arrow-datafusion] branch main updated: Fix up some DataFusionError::Internal errors with correct type (#6721)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 390841172a Fix up some DataFusionError::Internal errors with correct type (#6721)
390841172a is described below

commit 390841172ad54398957e6cd27b53ecfc93dd498f
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Wed Jun 21 16:33:13 2023 -0400

    Fix up some DataFusionError::Internal errors with correct type (#6721)
---
 datafusion/common/src/error.rs                     |  6 ++++--
 datafusion/execution/src/task.rs                   |  4 ++--
 datafusion/expr/src/logical_plan/plan.rs           |  4 ++--
 datafusion/physical-expr/src/aggregate/utils.rs    |  8 ++++----
 datafusion/physical-expr/src/array_expressions.rs  |  2 +-
 datafusion/physical-expr/src/struct_expressions.rs |  2 +-
 .../physical-expr/src/unicode_expressions.rs       | 10 +++++-----
 datafusion/sql/src/utils.rs                        |  4 ++--
 datafusion/sql/tests/sql_integration.rs            | 22 +++++++++++++++-------
 datafusion/substrait/src/logical_plan/consumer.rs  | 16 ++++++++--------
 10 files changed, 44 insertions(+), 34 deletions(-)

diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs
index 2074d35fb2..656cd76ce5 100644
--- a/datafusion/common/src/error.rs
+++ b/datafusion/common/src/error.rs
@@ -65,8 +65,10 @@ pub enum DataFusionError {
     NotImplemented(String),
     /// Error returned as a consequence of an error in DataFusion.
     /// This error should not happen in normal usage of DataFusion.
-    // DataFusions has internal invariants that we are unable to ask the compiler to check for us.
-    // This error is raised when one of those invariants is not verified during execution.
+    ///
+    /// DataFusions has internal invariants that the compiler is not
+    /// always able to check.  This error is raised when one of those
+    /// invariants is not verified during execution.
     Internal(String),
     /// This error happens whenever a plan is not valid. Examples include
     /// impossible casts.
diff --git a/datafusion/execution/src/task.rs b/datafusion/execution/src/task.rs
index ca1bc9369e..b5eb4c85fa 100644
--- a/datafusion/execution/src/task.rs
+++ b/datafusion/execution/src/task.rs
@@ -138,7 +138,7 @@ impl FunctionRegistry for TaskContext {
         let result = self.scalar_functions.get(name);
 
         result.cloned().ok_or_else(|| {
-            DataFusionError::Internal(format!(
+            DataFusionError::Plan(format!(
                 "There is no UDF named \"{name}\" in the TaskContext"
             ))
         })
@@ -148,7 +148,7 @@ impl FunctionRegistry for TaskContext {
         let result = self.aggregate_functions.get(name);
 
         result.cloned().ok_or_else(|| {
-            DataFusionError::Internal(format!(
+            DataFusionError::Plan(format!(
                 "There is no UDAF named \"{name}\" in the TaskContext"
             ))
         })
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index fad52fc3a3..ab45047acf 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -495,7 +495,7 @@ impl LogicalPlan {
             LogicalPlan::Prepare(prepare_lp) => {
                 // Verify if the number of params matches the number of values
                 if prepare_lp.data_types.len() != param_values.len() {
-                    return Err(DataFusionError::Internal(format!(
+                    return Err(DataFusionError::Plan(format!(
                         "Expected {} parameters, got {}",
                         prepare_lp.data_types.len(),
                         param_values.len()
@@ -506,7 +506,7 @@ impl LogicalPlan {
                 let iter = prepare_lp.data_types.iter().zip(param_values.iter());
                 for (i, (param_type, value)) in iter.enumerate() {
                     if *param_type != value.get_datatype() {
-                        return Err(DataFusionError::Internal(format!(
+                        return Err(DataFusionError::Plan(format!(
                             "Expected parameter of type {:?}, got {:?} at index {}",
                             param_type,
                             value.get_datatype(),
diff --git a/datafusion/physical-expr/src/aggregate/utils.rs b/datafusion/physical-expr/src/aggregate/utils.rs
index 158ceb316e..aa0834a89c 100644
--- a/datafusion/physical-expr/src/aggregate/utils.rs
+++ b/datafusion/physical-expr/src/aggregate/utils.rs
@@ -60,25 +60,25 @@ pub fn calculate_result_decimal_for_avg(
                     if new_value >= target_min && new_value <= target_max {
                         Ok(ScalarValue::Decimal128(Some(new_value), *p, *s))
                     } else {
-                        Err(DataFusionError::Internal(
+                        Err(DataFusionError::Execution(
                             "Arithmetic Overflow in AvgAccumulator".to_string(),
                         ))
                     }
                 } else {
                     // can't convert the lit decimal to the returned data type
-                    Err(DataFusionError::Internal(
+                    Err(DataFusionError::Execution(
                         "Arithmetic Overflow in AvgAccumulator".to_string(),
                     ))
                 }
             } else {
                 // can't convert the lit decimal to the returned data type
-                Err(DataFusionError::Internal(
+                Err(DataFusionError::Execution(
                     "Arithmetic Overflow in AvgAccumulator".to_string(),
                 ))
             }
         }
         other => Err(DataFusionError::Internal(format!(
-            "Error returned data type in AvgAccumulator {other:?}"
+            "Invalid target type in AvgAccumulator {other:?}"
         ))),
     }
 }
diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs
index 298bb66dd9..a3b4031356 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -81,7 +81,7 @@ macro_rules! array {
 fn array_array(args: &[ArrayRef]) -> Result<ArrayRef> {
     // do not accept 0 arguments.
     if args.is_empty() {
-        return Err(DataFusionError::Internal(
+        return Err(DataFusionError::Plan(
             "Array requires at least one argument".to_string(),
         ));
     }
diff --git a/datafusion/physical-expr/src/struct_expressions.rs b/datafusion/physical-expr/src/struct_expressions.rs
index dc8812b1ee..d4cfa309ff 100644
--- a/datafusion/physical-expr/src/struct_expressions.rs
+++ b/datafusion/physical-expr/src/struct_expressions.rs
@@ -26,7 +26,7 @@ use std::sync::Arc;
 fn array_struct(args: &[ArrayRef]) -> Result<ArrayRef> {
     // do not accept 0 arguments.
     if args.is_empty() {
-        return Err(DataFusionError::Internal(
+        return Err(DataFusionError::Execution(
             "struct requires at least one argument".to_string(),
         ));
     }
diff --git a/datafusion/physical-expr/src/unicode_expressions.rs b/datafusion/physical-expr/src/unicode_expressions.rs
index 6654904cf1..9f09402494 100644
--- a/datafusion/physical-expr/src/unicode_expressions.rs
+++ b/datafusion/physical-expr/src/unicode_expressions.rs
@@ -102,7 +102,7 @@ pub fn lpad<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
                 .map(|(string, length)| match (string, length) {
                     (Some(string), Some(length)) => {
                         if length > i32::MAX as i64 {
-                            return Err(DataFusionError::Internal(format!(
+                            return Err(DataFusionError::Execution(format!(
                                 "lpad requested length {length} too large"
                             )));
                         }
@@ -139,7 +139,7 @@ pub fn lpad<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
                 .map(|((string, length), fill)| match (string, length, fill) {
                     (Some(string), Some(length), Some(fill)) => {
                         if length > i32::MAX as i64 {
-                            return Err(DataFusionError::Internal(format!(
+                            return Err(DataFusionError::Execution(format!(
                                 "lpad requested length {length} too large"
                             )));
                         }
@@ -178,7 +178,7 @@ pub fn lpad<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 
             Ok(Arc::new(result) as ArrayRef)
         }
-        other => Err(DataFusionError::Internal(format!(
+        other => Err(DataFusionError::Execution(format!(
             "lpad was called with {other} arguments. It requires at least 2 and at most 3."
         ))),
     }
@@ -245,7 +245,7 @@ pub fn rpad<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
                 .map(|(string, length)| match (string, length) {
                     (Some(string), Some(length)) => {
                         if length > i32::MAX as i64 {
-                            return Err(DataFusionError::Internal(format!(
+                            return Err(DataFusionError::Execution(format!(
                                 "rpad requested length {length} too large"
                             )));
                         }
@@ -281,7 +281,7 @@ pub fn rpad<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
                 .map(|((string, length), fill)| match (string, length, fill) {
                     (Some(string), Some(length), Some(fill)) => {
                         if length > i32::MAX as i64 {
-                            return Err(DataFusionError::Internal(format!(
+                            return Err(DataFusionError::Execution(format!(
                                 "rpad requested length {length} too large"
                             )));
                         }
diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs
index 200963d321..e37830d0ba 100644
--- a/datafusion/sql/src/utils.rs
+++ b/datafusion/sql/src/utils.rs
@@ -523,7 +523,7 @@ pub(crate) fn make_decimal_type(
         (Some(p), Some(s)) => (p as u8, s as i8),
         (Some(p), None) => (p as u8, 0),
         (None, Some(_)) => {
-            return Err(DataFusionError::Internal(
+            return Err(DataFusionError::Plan(
                 "Cannot specify only scale for decimal data type".to_string(),
             ))
         }
@@ -535,7 +535,7 @@ pub(crate) fn make_decimal_type(
         || precision > DECIMAL128_MAX_PRECISION
         || scale.unsigned_abs() > precision
     {
-        Err(DataFusionError::Internal(format!(
+        Err(DataFusionError::Plan(format!(
             "Decimal(precision = {precision}, scale = {scale}) should satisfy `0 < precision <= 38`, and `scale <= precision`."
         )))
     } else {
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index f71dcf0195..83dd071109 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -197,31 +197,39 @@ fn try_cast_from_aggregation() {
 }
 
 #[test]
-fn cast_to_invalid_decimal_type() {
+fn cast_to_invalid_decimal_type_precision_0() {
     // precision == 0
     {
         let sql = "SELECT CAST(10 AS DECIMAL(0))";
         let err = logical_plan(sql).expect_err("query should have failed");
         assert_eq!(
-            r##"Internal("Decimal(precision = 0, scale = 0) should satisfy `0 < precision <= 38`, and `scale <= precision`.")"##,
+            r##"Plan("Decimal(precision = 0, scale = 0) should satisfy `0 < precision <= 38`, and `scale <= precision`.")"##,
             format!("{err:?}")
         );
     }
+}
+
+#[test]
+fn cast_to_invalid_decimal_type_precision_gt_38() {
     // precision > 38
     {
         let sql = "SELECT CAST(10 AS DECIMAL(39))";
         let err = logical_plan(sql).expect_err("query should have failed");
         assert_eq!(
-            r##"Internal("Decimal(precision = 39, scale = 0) should satisfy `0 < precision <= 38`, and `scale <= precision`.")"##,
+            r##"Plan("Decimal(precision = 39, scale = 0) should satisfy `0 < precision <= 38`, and `scale <= precision`.")"##,
             format!("{err:?}")
         );
     }
+}
+
+#[test]
+fn cast_to_invalid_decimal_type_precision_lt_scale() {
     // precision < scale
     {
         let sql = "SELECT CAST(10 AS DECIMAL(5, 10))";
         let err = logical_plan(sql).expect_err("query should have failed");
         assert_eq!(
-            r##"Internal("Decimal(precision = 5, scale = 10) should satisfy `0 < precision <= 38`, and `scale <= precision`.")"##,
+            r##"Plan("Decimal(precision = 5, scale = 10) should satisfy `0 < precision <= 38`, and `scale <= precision`.")"##,
             format!("{err:?}")
         );
     }
@@ -3613,7 +3621,7 @@ fn test_prepare_statement_to_plan_no_param() {
 }
 
 #[test]
-#[should_panic(expected = "value: Internal(\"Expected 1 parameters, got 0\")")]
+#[should_panic(expected = "value: Plan(\"Expected 1 parameters, got 0\")")]
 fn test_prepare_statement_to_plan_one_param_no_value_panic() {
     // no embedded parameter but still declare it
     let sql = "PREPARE my_plan(INT) AS SELECT id, age  FROM person WHERE age = 10";
@@ -3626,7 +3634,7 @@ fn test_prepare_statement_to_plan_one_param_no_value_panic() {
 
 #[test]
 #[should_panic(
-    expected = "value: Internal(\"Expected parameter of type Int32, got Float64 at index 0\")"
+    expected = "value: Plan(\"Expected parameter of type Int32, got Float64 at index 0\")"
 )]
 fn test_prepare_statement_to_plan_one_param_one_value_different_type_panic() {
     // no embedded parameter but still declare it
@@ -3639,7 +3647,7 @@ fn test_prepare_statement_to_plan_one_param_one_value_different_type_panic() {
 }
 
 #[test]
-#[should_panic(expected = "value: Internal(\"Expected 0 parameters, got 1\")")]
+#[should_panic(expected = "value: Plan(\"Expected 0 parameters, got 1\")")]
 fn test_prepare_statement_to_plan_no_param_on_value_panic() {
     // no embedded parameter but still declare it
     let sql = "PREPARE my_plan AS SELECT id, age  FROM person WHERE age = 10";
diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs
index 4af0c4c6c9..ffa935be1f 100644
--- a/datafusion/substrait/src/logical_plan/consumer.rs
+++ b/datafusion/substrait/src/logical_plan/consumer.rs
@@ -167,7 +167,7 @@ pub async fn from_substrait_plan(
                         Ok(from_substrait_rel(ctx, root.input.as_ref().unwrap(), &function_extension).await?)
                     }
                 },
-                None => Err(DataFusionError::Internal("Cannot parse plan relation: None".to_string()))
+                None => Err(DataFusionError::Plan("Cannot parse plan relation: None".to_string()))
             }
         },
         _ => Err(DataFusionError::NotImplemented(format!(
@@ -376,16 +376,16 @@ pub async fn from_substrait_rel(
                                 Operator::IsNotDistinctFrom => {
                                     Ok((l.clone(), r.clone(), true))
                                 }
-                                _ => Err(DataFusionError::Internal(
+                                _ => Err(DataFusionError::Plan(
                                     "invalid join condition op".to_string(),
                                 )),
                             },
-                            _ => Err(DataFusionError::Internal(
+                            _ => Err(DataFusionError::Plan(
                                 "invalid join condition expression".to_string(),
                             )),
                         }
                     }
-                    _ => Err(DataFusionError::Internal(
+                    _ => Err(DataFusionError::Plan(
                         "Non-binary expression is not supported in join condition"
                             .to_string(),
                     )),
@@ -406,7 +406,7 @@ pub async fn from_substrait_rel(
             Some(ReadType::NamedTable(nt)) => {
                 let table_reference = match nt.names.len() {
                     0 => {
-                        return Err(DataFusionError::Internal(
+                        return Err(DataFusionError::Plan(
                             "No table name found in NamedTable".to_string(),
                         ));
                     }
@@ -448,7 +448,7 @@ pub async fn from_substrait_rel(
                                         )?);
                                     Ok(LogicalPlan::TableScan(scan))
                                 }
-                                _ => Err(DataFusionError::Internal(
+                                _ => Err(DataFusionError::Plan(
                                     "unexpected plan for table".to_string(),
                                 )),
                             }
@@ -527,12 +527,12 @@ fn from_substrait_jointype(join_type: i32) -> Result<JoinType> {
             join_rel::JoinType::Outer => Ok(JoinType::Full),
             join_rel::JoinType::Anti => Ok(JoinType::LeftAnti),
             join_rel::JoinType::Semi => Ok(JoinType::LeftSemi),
-            _ => Err(DataFusionError::Internal(format!(
+            _ => Err(DataFusionError::Plan(format!(
                 "unsupported join type {substrait_join_type:?}"
             ))),
         }
     } else {
-        Err(DataFusionError::Internal(format!(
+        Err(DataFusionError::Plan(format!(
             "invalid join type variant {join_type:?}"
         )))
     }