You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2022/11/17 15:55:01 UTC

[arrow-datafusion] branch master updated: add a checker to confirm optimizer can keep plan schema immutable. (#4233)

This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 5ed40e0d2 add a checker to confirm optimizer can keep plan schema immutable. (#4233)
5ed40e0d2 is described below

commit 5ed40e0d2b849e7591040e42c33b20188fc8a052
Author: jakevin <ja...@gmail.com>
AuthorDate: Thu Nov 17 23:54:55 2022 +0800

    add a checker to confirm optimizer can keep plan schema immutable. (#4233)
    
    * add a check to confirm optimizer can keep plan schema immutable.
    
    * polish hint and add UT
---
 datafusion/optimizer/src/optimizer.rs | 52 ++++++++++++++++++++++++++++++++++-
 1 file changed, 51 insertions(+), 1 deletion(-)

diff --git a/datafusion/optimizer/src/optimizer.rs b/datafusion/optimizer/src/optimizer.rs
index 1b0cd9c96..f09d2ee24 100644
--- a/datafusion/optimizer/src/optimizer.rs
+++ b/datafusion/optimizer/src/optimizer.rs
@@ -225,6 +225,14 @@ impl Optimizer {
                 let result = rule.try_optimize(&new_plan, optimizer_config);
                 match result {
                     Ok(Some(plan)) => {
+                        if plan.schema() != new_plan.schema() {
+                            return Err(DataFusionError::Internal(format!(
+                                "Optimizer rule '{}' failed, due to generate a different schema, original schema: {:?}, new schema: {:?}",
+                                rule.name(),
+                                new_plan.schema(),
+                                plan.schema()
+                            )));
+                        }
                         new_plan = plan;
                         observer(&new_plan, rule.as_ref());
                         log_plan(rule.name(), &new_plan);
@@ -282,10 +290,11 @@ fn log_plan(description: &str, plan: &LogicalPlan) {
 #[cfg(test)]
 mod tests {
     use crate::optimizer::Optimizer;
+    use crate::test::test_table_scan;
     use crate::{OptimizerConfig, OptimizerRule};
     use datafusion_common::{DFSchema, DataFusionError};
     use datafusion_expr::logical_plan::EmptyRelation;
-    use datafusion_expr::LogicalPlan;
+    use datafusion_expr::{LogicalPlan, LogicalPlanBuilder};
     use std::sync::Arc;
 
     #[test]
@@ -318,6 +327,30 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn generate_different_schema() -> Result<(), DataFusionError> {
+        let opt = Optimizer::with_rules(vec![Arc::new(GetTableScanRule {})]);
+        let mut config = OptimizerConfig::new().with_skip_failing_rules(false);
+        let plan = LogicalPlan::EmptyRelation(EmptyRelation {
+            produce_one_row: false,
+            schema: Arc::new(DFSchema::empty()),
+        });
+        let result = opt.optimize(&plan, &mut config, &observe);
+        assert_eq!(
+            "Internal error: Optimizer rule 'get table_scan rule' failed, due to generate a different schema, \
+             original schema: DFSchema { fields: [], metadata: {} }, \
+             new schema: DFSchema { fields: [\
+             DFField { qualifier: Some(\"test\"), field: Field { name: \"a\", data_type: UInt32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None } }, \
+             DFField { qualifier: Some(\"test\"), field: Field { name: \"b\", data_type: UInt32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None } }, \
+             DFField { qualifier: Some(\"test\"), field: Field { name: \"c\", data_type: UInt32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None } }], \
+             metadata: {} }. \
+             This was likely caused by a bug in DataFusion's code \
+             and we would welcome that you file an bug report in our issue tracker",
+            format!("{}", result.err().unwrap())
+        );
+        Ok(())
+    }
+
     fn observe(_plan: &LogicalPlan, _rule: &dyn OptimizerRule) {}
 
     struct BadRule {}
@@ -335,4 +368,21 @@ mod tests {
             "bad rule"
         }
     }
+
+    struct GetTableScanRule {}
+
+    impl OptimizerRule for GetTableScanRule {
+        fn optimize(
+            &self,
+            _plan: &LogicalPlan,
+            _optimizer_config: &mut OptimizerConfig,
+        ) -> datafusion_common::Result<LogicalPlan> {
+            let table_scan = test_table_scan()?;
+            LogicalPlanBuilder::from(table_scan).build()
+        }
+
+        fn name(&self) -> &str {
+            "get table_scan rule"
+        }
+    }
 }