You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by xu...@apache.org on 2022/12/02 14:47:36 UTC

[arrow-datafusion] branch master updated: Drive sqllogictest runner on directory contents rather than hard coded list (#4472)

This is an automated email from the ASF dual-hosted git repository.

xudong963 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new b229e0ff8 Drive sqllogictest runner on directory contents rather than hard coded list (#4472)
b229e0ff8 is described below

commit b229e0ff8ac3783adec0e2b4020e95971b147be4
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Fri Dec 2 09:47:30 2022 -0500

    Drive sqllogictest runner on directory contents rather than hard coded list (#4472)
    
    * Update sqllogictest runner based on files
    
    * port some tests
    
    * fmt
    
    * improve comments
    
    * fix compilation on windows
---
 datafusion/core/tests/sql/information_schema.rs    |  95 -----------------
 datafusion/core/tests/sqllogictests/src/main.rs    | 115 +++++++++++----------
 .../test_files/information_schema.slt              |  61 +++++++++++
 3 files changed, 123 insertions(+), 148 deletions(-)

diff --git a/datafusion/core/tests/sql/information_schema.rs b/datafusion/core/tests/sql/information_schema.rs
index 75b252ac1..24652f67b 100644
--- a/datafusion/core/tests/sql/information_schema.rs
+++ b/datafusion/core/tests/sql/information_schema.rs
@@ -669,98 +669,3 @@ async fn show_external_create_table() {
 async fn plan_and_collect(ctx: &SessionContext, sql: &str) -> Result<Vec<RecordBatch>> {
     ctx.sql(sql).await?.collect().await
 }
-
-#[tokio::test]
-async fn show_variable_in_config_options() {
-    let ctx =
-        SessionContext::with_config(SessionConfig::new().with_information_schema(true));
-    let sql = "SHOW datafusion.execution.batch_size";
-    let results = plan_and_collect(&ctx, sql).await.unwrap();
-
-    let expected = vec![
-        "+---------------------------------+---------+",
-        "| name                            | setting |",
-        "+---------------------------------+---------+",
-        "| datafusion.execution.batch_size | 8192    |",
-        "+---------------------------------+---------+",
-    ];
-
-    assert_batches_eq!(expected, &results);
-}
-
-#[tokio::test]
-async fn show_all() {
-    let ctx =
-        SessionContext::with_config(SessionConfig::new().with_information_schema(true));
-    let sql = "SHOW ALL";
-
-    let results = plan_and_collect(&ctx, sql).await.unwrap();
-
-    // Has all the default values, should be in order by name
-    let expected = vec![
-        "+-----------------------------------------------------------+---------+",
-        "| name                                                      | setting |",
-        "+-----------------------------------------------------------+---------+",
-        "| datafusion.catalog.location                               | NULL    |",
-        "| datafusion.catalog.type                                   | NULL    |",
-        "| datafusion.execution.batch_size                           | 8192    |",
-        "| datafusion.execution.coalesce_batches                     | true    |",
-        "| datafusion.execution.coalesce_target_batch_size           | 4096    |",
-        "| datafusion.execution.parquet.enable_page_index            | false   |",
-        "| datafusion.execution.parquet.metadata_size_hint           | NULL    |",
-        "| datafusion.execution.parquet.pruning                      | true    |",
-        "| datafusion.execution.parquet.pushdown_filters             | false   |",
-        "| datafusion.execution.parquet.reorder_filters              | false   |",
-        "| datafusion.execution.parquet.skip_metadata                | true    |",
-        "| datafusion.execution.time_zone                            | +00:00  |",
-        "| datafusion.explain.logical_plan_only                      | false   |",
-        "| datafusion.explain.physical_plan_only                     | false   |",
-        "| datafusion.optimizer.filter_null_join_keys                | false   |",
-        "| datafusion.optimizer.hash_join_single_partition_threshold | 1048576 |",
-        "| datafusion.optimizer.max_passes                           | 3       |",
-        "| datafusion.optimizer.prefer_hash_join                     | true    |",
-        "| datafusion.optimizer.skip_failed_rules                    | true    |",
-        "| datafusion.optimizer.top_down_join_key_reordering         | true    |",
-        "+-----------------------------------------------------------+---------+",
-    ];
-
-    assert_batches_eq!(expected, &results);
-}
-
-#[tokio::test]
-async fn show_time_zone_default_utc() {
-    // https://github.com/apache/arrow-datafusion/issues/3255
-    let ctx =
-        SessionContext::with_config(SessionConfig::new().with_information_schema(true));
-    let sql = "SHOW TIME ZONE";
-    let results = plan_and_collect(&ctx, sql).await.unwrap();
-
-    let expected = vec![
-        "+--------------------------------+---------+",
-        "| name                           | setting |",
-        "+--------------------------------+---------+",
-        "| datafusion.execution.time_zone | +00:00  |",
-        "+--------------------------------+---------+",
-    ];
-
-    assert_batches_eq!(expected, &results);
-}
-
-#[tokio::test]
-async fn show_timezone_default_utc() {
-    // https://github.com/apache/arrow-datafusion/issues/3255
-    let ctx =
-        SessionContext::with_config(SessionConfig::new().with_information_schema(true));
-    let sql = "SHOW TIMEZONE";
-    let results = plan_and_collect(&ctx, sql).await.unwrap();
-
-    let expected = vec![
-        "+--------------------------------+---------+",
-        "| name                           | setting |",
-        "+--------------------------------+---------+",
-        "| datafusion.execution.time_zone | +00:00  |",
-        "+--------------------------------+---------+",
-    ];
-
-    assert_batches_eq!(expected, &results);
-}
diff --git a/datafusion/core/tests/sqllogictests/src/main.rs b/datafusion/core/tests/sqllogictests/src/main.rs
index 4d2f54222..fc27773c0 100644
--- a/datafusion/core/tests/sqllogictests/src/main.rs
+++ b/datafusion/core/tests/sqllogictests/src/main.rs
@@ -18,8 +18,8 @@
 use async_trait::async_trait;
 use datafusion::arrow::csv::WriterBuilder;
 use datafusion::arrow::record_batch::RecordBatch;
-use datafusion::prelude::SessionContext;
-use std::path::PathBuf;
+use datafusion::prelude::{SessionConfig, SessionContext};
+use std::path::Path;
 use std::time::Duration;
 
 use sqllogictest::TestError;
@@ -29,41 +29,10 @@ mod setup;
 mod utils;
 
 const TEST_DIRECTORY: &str = "tests/sqllogictests/test_files";
-const TEST_CATEGORIES: [TestCategory; 2] =
-    [TestCategory::Aggregate, TestCategory::ArrowTypeOf];
-
-pub enum TestCategory {
-    Aggregate,
-    ArrowTypeOf,
-}
-
-impl TestCategory {
-    fn as_str(&self) -> &'static str {
-        match self {
-            TestCategory::Aggregate => "Aggregate",
-            TestCategory::ArrowTypeOf => "ArrowTypeOf",
-        }
-    }
-
-    fn test_filename(&self) -> &'static str {
-        match self {
-            TestCategory::Aggregate => "aggregate.slt",
-            TestCategory::ArrowTypeOf => "arrow_typeof.slt",
-        }
-    }
-
-    async fn register_test_tables(&self, ctx: &SessionContext) {
-        println!("[{}] Registering tables", self.as_str());
-        match self {
-            TestCategory::Aggregate => setup::register_aggregate_tables(ctx).await,
-            TestCategory::ArrowTypeOf => (),
-        }
-    }
-}
 
 pub struct DataFusion {
     ctx: SessionContext,
-    test_category: TestCategory,
+    file_name: String,
 }
 
 #[async_trait]
@@ -71,11 +40,7 @@ impl sqllogictest::AsyncDB for DataFusion {
     type Error = TestError;
 
     async fn run(&mut self, sql: &str) -> Result<String> {
-        println!(
-            "[{}] Running query: \"{}\"",
-            self.test_category.as_str(),
-            sql
-        );
+        println!("[{}] Running query: \"{}\"", self.file_name, sql);
         let result = run_query(&self.ctx, sql).await?;
         Ok(result)
     }
@@ -96,26 +61,70 @@ impl sqllogictest::AsyncDB for DataFusion {
 }
 
 #[tokio::main]
+#[cfg(target_family = "windows")]
 pub async fn main() -> Result<()> {
-    for test_category in TEST_CATEGORIES {
-        let filename = PathBuf::from(format!(
-            "{}/{}",
-            TEST_DIRECTORY,
-            test_category.test_filename()
-        ));
-        let ctx = SessionContext::new();
-        test_category.register_test_tables(&ctx).await;
-
-        if !cfg!(target_os = "windows") {
-            let mut tester = sqllogictest::Runner::new(DataFusion { ctx, test_category });
-            // TODO: use tester.run_parallel_async()
-            tester.run_file_async(filename).await?;
-        }
+    println!("Skipping test on windows");
+    Ok(())
+}
+
+#[tokio::main]
+#[cfg(not(target_family = "windows"))]
+pub async fn main() -> Result<()> {
+    let paths = std::fs::read_dir(TEST_DIRECTORY).unwrap();
+
+    // run each file using its own new SessionContext
+    //
+    // Note: can't use tester.run_parallel_async()
+    // as that will reuse the same SessionContext
+    //
+    // We could run these tests in parallel eventually if we wanted.
+
+    for path in paths {
+        // TODO better error handling
+        let path = path.unwrap().path();
+
+        run_file(&path).await?;
     }
 
     Ok(())
 }
 
+/// Run the tests in the specified `.slt` file
+async fn run_file(path: &Path) -> Result<()> {
+    println!("Running: {}", path.display());
+
+    let file_name = path.file_name().unwrap().to_str().unwrap().to_string();
+
+    let ctx = context_for_test_file(&file_name).await;
+
+    let mut tester = sqllogictest::Runner::new(DataFusion { ctx, file_name });
+    tester.run_file_async(path).await?;
+
+    Ok(())
+}
+
+/// Create a SessionContext, configured for the specific test
+async fn context_for_test_file(file_name: &str) -> SessionContext {
+    match file_name {
+        "aggregate.slt" => {
+            println!("Registering aggregate tables");
+            let ctx = SessionContext::new();
+            setup::register_aggregate_tables(&ctx).await;
+            ctx
+        }
+        "information_schema.slt" => {
+            println!("Enabling information schema");
+            SessionContext::with_config(
+                SessionConfig::new().with_information_schema(true),
+            )
+        }
+        _ => {
+            println!("Using default SessionContex");
+            SessionContext::new()
+        }
+    }
+}
+
 fn format_batches(batches: &[RecordBatch]) -> Result<String> {
     let mut bytes = vec![];
     {
diff --git a/datafusion/core/tests/sqllogictests/test_files/information_schema.slt b/datafusion/core/tests/sqllogictests/test_files/information_schema.slt
new file mode 100644
index 000000000..5ec28b162
--- /dev/null
+++ b/datafusion/core/tests/sqllogictests/test_files/information_schema.slt
@@ -0,0 +1,61 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# show all variables
+query R
+SHOW ALL
+----
+datafusion.catalog.location NULL
+datafusion.catalog.type NULL
+datafusion.execution.batch_size 8192
+datafusion.execution.coalesce_batches true
+datafusion.execution.coalesce_target_batch_size 4096
+datafusion.execution.parquet.enable_page_index false
+datafusion.execution.parquet.metadata_size_hint NULL
+datafusion.execution.parquet.pruning true
+datafusion.execution.parquet.pushdown_filters false
+datafusion.execution.parquet.reorder_filters false
+datafusion.execution.parquet.skip_metadata true
+datafusion.execution.time_zone +00:00
+datafusion.explain.logical_plan_only false
+datafusion.explain.physical_plan_only false
+datafusion.optimizer.filter_null_join_keys false
+datafusion.optimizer.hash_join_single_partition_threshold 1048576
+datafusion.optimizer.max_passes 3
+datafusion.optimizer.prefer_hash_join true
+datafusion.optimizer.skip_failed_rules true
+datafusion.optimizer.top_down_join_key_reordering true
+
+# show_variable_in_config_options
+query R
+SHOW datafusion.execution.batch_size
+----
+datafusion.execution.batch_size 8192
+
+# show_time_zone_default_utc
+# https://github.com/apache/arrow-datafusion/issues/3255
+query R
+SHOW TIME ZONE
+----
+datafusion.execution.time_zone +00:00
+
+# show_timezone_default_utc
+# https://github.com/apache/arrow-datafusion/issues/3255
+query R
+SHOW TIMEZONE
+----
+datafusion.execution.time_zone +00:00
\ No newline at end of file