You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2021/08/23 20:52:11 UTC
[arrow-datafusion] branch master updated: EXPLAIN ANALYZE should
run all Optimizer passes (#929)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 1922130 EXPLAIN ANALYZE should run all Optimizer passes (#929)
1922130 is described below
commit 1922130f3d4776a3a0d85dbc73044e452dba2bee
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Mon Aug 23 16:52:03 2021 -0400
EXPLAIN ANALYZE should run all Optimizer passes (#929)
---
datafusion/src/optimizer/utils.rs | 14 ++++++++++--
datafusion/tests/sql.rs | 45 +++++++++++++++++++++++++++++++++++++++
2 files changed, 57 insertions(+), 2 deletions(-)
diff --git a/datafusion/src/optimizer/utils.rs b/datafusion/src/optimizer/utils.rs
index 8ce6fe5..07e758d 100644
--- a/datafusion/src/optimizer/utils.rs
+++ b/datafusion/src/optimizer/utils.rs
@@ -195,11 +195,21 @@ pub fn from_plan(
schema: schema.clone(),
alias: alias.clone(),
}),
+ LogicalPlan::Analyze {
+ verbose, schema, ..
+ } => {
+ assert!(expr.is_empty());
+ assert_eq!(inputs.len(), 1);
+ Ok(LogicalPlan::Analyze {
+ verbose: *verbose,
+ schema: schema.clone(),
+ input: Arc::new(inputs[0].clone()),
+ })
+ }
LogicalPlan::EmptyRelation { .. }
| LogicalPlan::TableScan { .. }
| LogicalPlan::CreateExternalTable { .. }
- | LogicalPlan::Explain { .. }
- | LogicalPlan::Analyze { .. } => Ok(plan.clone()),
+ | LogicalPlan::Explain { .. } => Ok(plan.clone()),
}
}
diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs
index 0f38568..285a3a9 100644
--- a/datafusion/tests/sql.rs
+++ b/datafusion/tests/sql.rs
@@ -52,6 +52,28 @@ use datafusion::{
};
use datafusion::{execution::context::ExecutionContext, physical_plan::displayable};
+/// A macro to assert that one string is contained within another with
+/// a nice error message if they are not.
+///
+/// Usage: `assert_contains!(actual, expected)`
+///
+/// Is a macro so test error
+/// messages are on the same line as the failure;
+///
+/// Both arguments must be convertable into Strings (Into<String>)
+macro_rules! assert_contains {
+ ($ACTUAL: expr, $EXPECTED: expr) => {
+ let actual_value: String = $ACTUAL.into();
+ let expected_value: String = $EXPECTED.into();
+ assert!(
+ actual_value.contains(&expected_value),
+ "Can not find expected in actual.\n\nExpected:\n{}\n\nActual:\n{}",
+ expected_value,
+ actual_value
+ );
+ };
+}
+
#[tokio::test]
async fn nyc() -> Result<()> {
// schema for nyxtaxi csv files
@@ -2589,6 +2611,29 @@ async fn csv_explain_verbose_plans() {
);
}
+#[tokio::test]
+async fn explain_analyze_runs_optimizers() {
+ // repro for https://github.com/apache/arrow-datafusion/issues/917
+ // where EXPLAIN ANALYZE was not correctly running optiimizer
+ let mut ctx = ExecutionContext::new();
+ register_alltypes_parquet(&mut ctx);
+
+ // This happens as an optimization pass where count(*) can be
+ // answered using statistics only.
+ let expected = "EmptyExec: produce_one_row=true";
+
+ let sql = "EXPLAIN SELECT count(*) from alltypes_plain";
+ let actual = execute_to_batches(&mut ctx, sql).await;
+ let actual = arrow::util::pretty::pretty_format_batches(&actual).unwrap();
+ assert_contains!(actual, expected);
+
+ // EXPLAIN ANALYZE should work the same
+ let sql = "EXPLAIN ANALYZE SELECT count(*) from alltypes_plain";
+ let actual = execute_to_batches(&mut ctx, sql).await;
+ let actual = arrow::util::pretty::pretty_format_batches(&actual).unwrap();
+ assert_contains!(actual, expected);
+}
+
fn aggr_test_schema() -> SchemaRef {
Arc::new(Schema::new(vec![
Field::new("c1", DataType::Utf8, false),