You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/01/27 20:53:41 UTC
[arrow-datafusion] branch master updated: Show optimization errors in explain (#4819)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 7673fcc46 Show optimization errors in explain (#4819)
7673fcc46 is described below
commit 7673fcc464617d75c95f703f4815db810b30e7a3
Author: Jeffrey <22...@users.noreply.github.com>
AuthorDate: Sat Jan 28 07:53:34 2023 +1100
Show optimization errors in explain (#4819)
* Explain show physical optimization errors
* Revert "Explain show physical optimization errors"
This reverts commit cdd21be7a1c987958e0a9dff5c498080c6277e66.
* Don't fail explain if failure during planning
---
datafusion/core/src/execution/context.rs | 18 +++++--
datafusion/core/src/physical_plan/planner.rs | 79 +++++++++++++++++++---------
datafusion/expr/src/logical_plan/builder.rs | 1 +
datafusion/expr/src/logical_plan/plan.rs | 2 +
datafusion/optimizer/src/optimizer.rs | 22 +++++---
datafusion/sql/src/statement.rs | 1 +
6 files changed, 87 insertions(+), 36 deletions(-)
diff --git a/datafusion/core/src/execution/context.rs b/datafusion/core/src/execution/context.rs
index 0705c9b5a..99a49d04d 100644
--- a/datafusion/core/src/execution/context.rs
+++ b/datafusion/core/src/execution/context.rs
@@ -27,7 +27,7 @@ use crate::{
optimizer::PhysicalOptimizerRule,
},
};
-use datafusion_expr::DescribeTable;
+use datafusion_expr::{DescribeTable, StringifiedPlan};
pub use datafusion_physical_expr::execution_props::ExecutionProps;
use datafusion_physical_expr::var_provider::is_system_variables;
use parking_lot::RwLock;
@@ -1766,7 +1766,7 @@ impl SessionState {
let mut stringified_plans = e.stringified_plans.clone();
// optimize the child plan, capturing the output of each optimizer
- let plan = self.optimizer.optimize(
+ let (plan, logical_optimization_succeeded) = match self.optimizer.optimize(
e.plan.as_ref(),
self,
|optimized_plan, optimizer| {
@@ -1774,13 +1774,23 @@ impl SessionState {
let plan_type = PlanType::OptimizedLogicalPlan { optimizer_name };
stringified_plans.push(optimized_plan.to_stringified(plan_type));
},
- )?;
+ ) {
+ Ok(plan) => (Arc::new(plan), true),
+ Err(DataFusionError::Context(optimizer_name, err)) => {
+ let plan_type = PlanType::OptimizedLogicalPlan { optimizer_name };
+ stringified_plans
+ .push(StringifiedPlan::new(plan_type, err.to_string()));
+ (e.plan.clone(), false)
+ }
+ Err(e) => return Err(e),
+ };
Ok(LogicalPlan::Explain(Explain {
verbose: e.verbose,
- plan: Arc::new(plan),
+ plan,
stringified_plans,
schema: e.schema.clone(),
+ logical_optimization_succeeded,
}))
} else {
self.optimizer.optimize(plan, self, |_, _| {})
diff --git a/datafusion/core/src/physical_plan/planner.rs b/datafusion/core/src/physical_plan/planner.rs
index c607b9791..a3cd42ecb 100644
--- a/datafusion/core/src/physical_plan/planner.rs
+++ b/datafusion/core/src/physical_plan/planner.rs
@@ -63,9 +63,9 @@ use datafusion_expr::expr::{
Like, TryCast, WindowFunction,
};
use datafusion_expr::expr_rewriter::unnormalize_cols;
-use datafusion_expr::logical_plan;
use datafusion_expr::logical_plan::builder::wrap_projection_for_join_if_necessary;
use datafusion_expr::utils::expand_wildcard;
+use datafusion_expr::{logical_plan, StringifiedPlan};
use datafusion_expr::{WindowFrame, WindowFrameBound};
use datafusion_optimizer::utils::unalias;
use datafusion_physical_expr::expressions::Literal;
@@ -1740,28 +1740,47 @@ impl DefaultPhysicalPlanner {
if !config.physical_plan_only {
stringified_plans = e.stringified_plans.clone();
- stringified_plans.push(e.plan.to_stringified(FinalLogicalPlan));
+ if e.logical_optimization_succeeded {
+ stringified_plans.push(e.plan.to_stringified(FinalLogicalPlan));
+ }
}
- if !config.logical_plan_only {
- let input = self
+ if !config.logical_plan_only && e.logical_optimization_succeeded {
+ match self
.create_initial_plan(e.plan.as_ref(), session_state)
- .await?;
-
- stringified_plans.push(
- displayable(input.as_ref()).to_stringified(InitialPhysicalPlan),
- );
-
- let input =
- self.optimize_internal(input, session_state, |plan, optimizer| {
- let optimizer_name = optimizer.name().to_string();
- let plan_type = OptimizedPhysicalPlan { optimizer_name };
- stringified_plans
- .push(displayable(plan).to_stringified(plan_type));
- })?;
+ .await
+ {
+ Ok(input) => {
+ stringified_plans.push(
+ displayable(input.as_ref())
+ .to_stringified(InitialPhysicalPlan),
+ );
- stringified_plans
- .push(displayable(input.as_ref()).to_stringified(FinalPhysicalPlan));
+ match self.optimize_internal(
+ input,
+ session_state,
+ |plan, optimizer| {
+ let optimizer_name = optimizer.name().to_string();
+ let plan_type = OptimizedPhysicalPlan { optimizer_name };
+ stringified_plans
+ .push(displayable(plan).to_stringified(plan_type));
+ },
+ ) {
+ Ok(input) => stringified_plans.push(
+ displayable(input.as_ref())
+ .to_stringified(FinalPhysicalPlan),
+ ),
+ Err(DataFusionError::Context(optimizer_name, e)) => {
+ let plan_type = OptimizedPhysicalPlan { optimizer_name };
+ stringified_plans
+ .push(StringifiedPlan::new(plan_type, e.to_string()))
+ }
+ Err(e) => return Err(e),
+ }
+ }
+ Err(e) => stringified_plans
+ .push(StringifiedPlan::new(InitialPhysicalPlan, e.to_string())),
+ }
}
Ok(Some(Arc::new(ExplainExec::new(
@@ -1795,14 +1814,22 @@ impl DefaultPhysicalPlanner {
let mut new_plan = plan;
for optimizer in optimizers {
let before_schema = new_plan.schema();
- new_plan = optimizer.optimize(new_plan, session_state.config_options())?;
+ new_plan = optimizer
+ .optimize(new_plan, session_state.config_options())
+ .map_err(|e| {
+ DataFusionError::Context(optimizer.name().to_string(), Box::new(e))
+ })?;
if optimizer.schema_check() && new_plan.schema() != before_schema {
- return Err(DataFusionError::Internal(format!(
- "PhysicalOptimizer rule '{}' failed, due to generate a different schema, original schema: {:?}, new schema: {:?}",
- optimizer.name(),
- before_schema,
- new_plan.schema()
- )));
+ let e = DataFusionError::Internal(format!(
+ "PhysicalOptimizer rule '{}' failed, due to generate a different schema, original schema: {:?}, new schema: {:?}",
+ optimizer.name(),
+ before_schema,
+ new_plan.schema()
+ ));
+ return Err(DataFusionError::Context(
+ optimizer.name().to_string(),
+ Box::new(e),
+ ));
}
trace!(
"Optimized physical plan by {}:\n{}\n",
diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index 95d808121..f6213d439 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -745,6 +745,7 @@ impl LogicalPlanBuilder {
plan: Arc::new(self.plan),
stringified_plans,
schema,
+ logical_optimization_succeeded: false,
})))
}
}
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index 785f84af1..08a687c0b 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -1658,6 +1658,8 @@ pub struct Explain {
pub stringified_plans: Vec<StringifiedPlan>,
/// The output schema of the explain (2 columns of text)
pub schema: DFSchemaRef,
+ /// Used by physical planner to check if should proceed with planning
+ pub logical_optimization_succeeded: bool,
}
/// Runs the actual plan, and then prints the physical plan with
diff --git a/datafusion/optimizer/src/optimizer.rs b/datafusion/optimizer/src/optimizer.rs
index 7390ac204..b4eea1db8 100644
--- a/datafusion/optimizer/src/optimizer.rs
+++ b/datafusion/optimizer/src/optimizer.rs
@@ -268,12 +268,16 @@ impl Optimizer {
match result {
Ok(Some(plan)) => {
if !plan.schema().equivalent_names_and_types(new_plan.schema()) {
- return Err(DataFusionError::Internal(format!(
+ let e = DataFusionError::Internal(format!(
"Optimizer rule '{}' failed, due to generate a different schema, original schema: {:?}, new schema: {:?}",
rule.name(),
new_plan.schema(),
plan.schema()
- )));
+ ));
+ return Err(DataFusionError::Context(
+ rule.name().to_string(),
+ Box::new(e),
+ ));
}
new_plan = plan;
observer(&new_plan, rule.as_ref());
@@ -298,11 +302,15 @@ impl Optimizer {
e
);
} else {
- return Err(DataFusionError::Internal(format!(
+ let e = DataFusionError::Internal(format!(
"Optimizer rule '{}' failed due to unexpected error: {}",
rule.name(),
e
- )));
+ ));
+ return Err(DataFusionError::Context(
+ rule.name().to_string(),
+ Box::new(e),
+ ));
}
}
}
@@ -436,7 +444,8 @@ mod tests {
});
let err = opt.optimize(&plan, &config, &observe).unwrap_err();
assert_eq!(
- "Internal error: Optimizer rule 'bad rule' failed due to unexpected error: \
+ "bad rule\ncaused by\n\
+ Internal error: Optimizer rule 'bad rule' failed due to unexpected error: \
Error during planning: rule failed. This was likely caused by a bug in \
DataFusion's code and we would welcome that you file an bug report in our issue tracker",
err.to_string()
@@ -453,7 +462,8 @@ mod tests {
});
let err = opt.optimize(&plan, &config, &observe).unwrap_err();
assert_eq!(
- "Internal error: Optimizer rule 'get table_scan rule' failed, due to generate a different schema, \
+ "get table_scan rule\ncaused by\n\
+ Internal error: Optimizer rule 'get table_scan rule' failed, due to generate a different schema, \
original schema: DFSchema { fields: [], metadata: {} }, \
new schema: DFSchema { fields: [\
DFField { qualifier: Some(\"test\"), field: Field { name: \"a\", data_type: UInt32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} } }, \
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 33e9306e9..1428e92a5 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -508,6 +508,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
plan,
stringified_plans,
schema,
+ logical_optimization_succeeded: false,
}))
}
}