You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2021/08/23 20:52:11 UTC

[arrow-datafusion] branch master updated: EXPLAIN ANALYZE should run all Optimizer passes (#929)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 1922130  EXPLAIN ANALYZE should run all Optimizer passes (#929)
1922130 is described below

commit 1922130f3d4776a3a0d85dbc73044e452dba2bee
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Mon Aug 23 16:52:03 2021 -0400

    EXPLAIN ANALYZE should run all Optimizer passes (#929)
---
 datafusion/src/optimizer/utils.rs | 14 ++++++++++--
 datafusion/tests/sql.rs           | 45 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/datafusion/src/optimizer/utils.rs b/datafusion/src/optimizer/utils.rs
index 8ce6fe5..07e758d 100644
--- a/datafusion/src/optimizer/utils.rs
+++ b/datafusion/src/optimizer/utils.rs
@@ -195,11 +195,21 @@ pub fn from_plan(
             schema: schema.clone(),
             alias: alias.clone(),
         }),
+        LogicalPlan::Analyze {
+            verbose, schema, ..
+        } => {
+            assert!(expr.is_empty());
+            assert_eq!(inputs.len(), 1);
+            Ok(LogicalPlan::Analyze {
+                verbose: *verbose,
+                schema: schema.clone(),
+                input: Arc::new(inputs[0].clone()),
+            })
+        }
         LogicalPlan::EmptyRelation { .. }
         | LogicalPlan::TableScan { .. }
         | LogicalPlan::CreateExternalTable { .. }
-        | LogicalPlan::Explain { .. }
-        | LogicalPlan::Analyze { .. } => Ok(plan.clone()),
+        | LogicalPlan::Explain { .. } => Ok(plan.clone()),
     }
 }
 
diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs
index 0f38568..285a3a9 100644
--- a/datafusion/tests/sql.rs
+++ b/datafusion/tests/sql.rs
@@ -52,6 +52,28 @@ use datafusion::{
 };
 use datafusion::{execution::context::ExecutionContext, physical_plan::displayable};
 
+/// A macro to assert that one string is contained within another with
+/// a nice error message if they are not.
+///
+/// Usage: `assert_contains!(actual, expected)`
+///
+/// Is a macro so test error
+/// messages are on the same line as the failure;
+///
+/// Both arguments must be convertable into Strings (Into<String>)
+macro_rules! assert_contains {
+    ($ACTUAL: expr, $EXPECTED: expr) => {
+        let actual_value: String = $ACTUAL.into();
+        let expected_value: String = $EXPECTED.into();
+        assert!(
+            actual_value.contains(&expected_value),
+            "Can not find expected in actual.\n\nExpected:\n{}\n\nActual:\n{}",
+            expected_value,
+            actual_value
+        );
+    };
+}
+
 #[tokio::test]
 async fn nyc() -> Result<()> {
     // schema for nyxtaxi csv files
@@ -2589,6 +2611,29 @@ async fn csv_explain_verbose_plans() {
     );
 }
 
+#[tokio::test]
+async fn explain_analyze_runs_optimizers() {
+    // repro for https://github.com/apache/arrow-datafusion/issues/917
+    // where EXPLAIN ANALYZE was not correctly running optiimizer
+    let mut ctx = ExecutionContext::new();
+    register_alltypes_parquet(&mut ctx);
+
+    // This happens as an optimization pass where count(*) can be
+    // answered using statistics only.
+    let expected = "EmptyExec: produce_one_row=true";
+
+    let sql = "EXPLAIN SELECT count(*) from alltypes_plain";
+    let actual = execute_to_batches(&mut ctx, sql).await;
+    let actual = arrow::util::pretty::pretty_format_batches(&actual).unwrap();
+    assert_contains!(actual, expected);
+
+    // EXPLAIN ANALYZE should work the same
+    let sql = "EXPLAIN  ANALYZE SELECT count(*) from alltypes_plain";
+    let actual = execute_to_batches(&mut ctx, sql).await;
+    let actual = arrow::util::pretty::pretty_format_batches(&actual).unwrap();
+    assert_contains!(actual, expected);
+}
+
 fn aggr_test_schema() -> SchemaRef {
     Arc::new(Schema::new(vec![
         Field::new("c1", DataType::Utf8, false),