You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2022/11/17 15:55:01 UTC
[arrow-datafusion] branch master updated: add a checker to confirm optimizer can keep plan schema immutable. (#4233)
This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 5ed40e0d2 add a checker to confirm optimizer can keep plan schema immutable. (#4233)
5ed40e0d2 is described below
commit 5ed40e0d2b849e7591040e42c33b20188fc8a052
Author: jakevin <ja...@gmail.com>
AuthorDate: Thu Nov 17 23:54:55 2022 +0800
add a checker to confirm optimizer can keep plan schema immutable. (#4233)
* add a check to confirm optimizer can keep plan schema immutable.
* polish hint and add UT
---
datafusion/optimizer/src/optimizer.rs | 52 ++++++++++++++++++++++++++++++++++-
1 file changed, 51 insertions(+), 1 deletion(-)
diff --git a/datafusion/optimizer/src/optimizer.rs b/datafusion/optimizer/src/optimizer.rs
index 1b0cd9c96..f09d2ee24 100644
--- a/datafusion/optimizer/src/optimizer.rs
+++ b/datafusion/optimizer/src/optimizer.rs
@@ -225,6 +225,14 @@ impl Optimizer {
let result = rule.try_optimize(&new_plan, optimizer_config);
match result {
Ok(Some(plan)) => {
+ if plan.schema() != new_plan.schema() {
+ return Err(DataFusionError::Internal(format!(
+ "Optimizer rule '{}' failed, due to generate a different schema, original schema: {:?}, new schema: {:?}",
+ rule.name(),
+ new_plan.schema(),
+ plan.schema()
+ )));
+ }
new_plan = plan;
observer(&new_plan, rule.as_ref());
log_plan(rule.name(), &new_plan);
@@ -282,10 +290,11 @@ fn log_plan(description: &str, plan: &LogicalPlan) {
#[cfg(test)]
mod tests {
use crate::optimizer::Optimizer;
+ use crate::test::test_table_scan;
use crate::{OptimizerConfig, OptimizerRule};
use datafusion_common::{DFSchema, DataFusionError};
use datafusion_expr::logical_plan::EmptyRelation;
- use datafusion_expr::LogicalPlan;
+ use datafusion_expr::{LogicalPlan, LogicalPlanBuilder};
use std::sync::Arc;
#[test]
@@ -318,6 +327,30 @@ mod tests {
Ok(())
}
+ #[test]
+ fn generate_different_schema() -> Result<(), DataFusionError> {
+ let opt = Optimizer::with_rules(vec![Arc::new(GetTableScanRule {})]);
+ let mut config = OptimizerConfig::new().with_skip_failing_rules(false);
+ let plan = LogicalPlan::EmptyRelation(EmptyRelation {
+ produce_one_row: false,
+ schema: Arc::new(DFSchema::empty()),
+ });
+ let result = opt.optimize(&plan, &mut config, &observe);
+ assert_eq!(
+ "Internal error: Optimizer rule 'get table_scan rule' failed, due to generate a different schema, \
+ original schema: DFSchema { fields: [], metadata: {} }, \
+ new schema: DFSchema { fields: [\
+ DFField { qualifier: Some(\"test\"), field: Field { name: \"a\", data_type: UInt32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None } }, \
+ DFField { qualifier: Some(\"test\"), field: Field { name: \"b\", data_type: UInt32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None } }, \
+ DFField { qualifier: Some(\"test\"), field: Field { name: \"c\", data_type: UInt32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None } }], \
+ metadata: {} }. \
+ This was likely caused by a bug in DataFusion's code \
+ and we would welcome that you file an bug report in our issue tracker",
+ format!("{}", result.err().unwrap())
+ );
+ Ok(())
+ }
+
fn observe(_plan: &LogicalPlan, _rule: &dyn OptimizerRule) {}
struct BadRule {}
@@ -335,4 +368,21 @@ mod tests {
"bad rule"
}
}
+
+ struct GetTableScanRule {}
+
+ impl OptimizerRule for GetTableScanRule {
+ fn optimize(
+ &self,
+ _plan: &LogicalPlan,
+ _optimizer_config: &mut OptimizerConfig,
+ ) -> datafusion_common::Result<LogicalPlan> {
+ let table_scan = test_table_scan()?;
+ LogicalPlanBuilder::from(table_scan).build()
+ }
+
+ fn name(&self) -> &str {
+ "get table_scan rule"
+ }
+ }
}