You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by xu...@apache.org on 2022/12/22 13:05:15 UTC
[arrow-datafusion] branch master updated: Add `--complete` auto completion mode to `sqllogictests` (#4665)
This is an automated email from the ASF dual-hosted git repository.
xudong963 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 04d095d8b Add `--complete` auto completion mode to `sqllogictests` (#4665)
04d095d8b is described below
commit 04d095d8bf933e8ea3066dbf1e1166169b0b92f6
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Thu Dec 22 08:05:10 2022 -0500
Add `--complete` auto completion mode to `sqllogictests` (#4665)
* Add `--complete` auto completion mode to `sqllogictests`
* clippy
* Apply suggestions from code review
Co-authored-by: xudong.w <wx...@gmail.com>
Co-authored-by: xudong.w <wx...@gmail.com>
---
datafusion/core/Cargo.toml | 2 +-
datafusion/core/tests/sql/aggregates.rs | 34 -----
datafusion/core/tests/sqllogictests/README.md | 17 ++-
datafusion/core/tests/sqllogictests/src/error.rs | 9 ++
datafusion/core/tests/sqllogictests/src/main.rs | 141 ++++++++++++---------
.../tests/sqllogictests/test_files/aggregate.slt | 14 ++
.../core/tests/sqllogictests/test_files/ddl.slt | 6 +
7 files changed, 130 insertions(+), 93 deletions(-)
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 898237a83..18f590168 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -109,7 +109,7 @@ doc-comment = "0.3"
env_logger = "0.10"
parquet-test-utils = { path = "../../parquet-test-utils" }
rstest = "0.16.0"
-sqllogictest = "0.9.0"
+sqllogictest = "0.10.0"
sqlparser = "0.28"
test-utils = { path = "../../test-utils" }
thiserror = "1.0.37"
diff --git a/datafusion/core/tests/sql/aggregates.rs b/datafusion/core/tests/sql/aggregates.rs
index ec44d28c6..a870dcc5b 100644
--- a/datafusion/core/tests/sql/aggregates.rs
+++ b/datafusion/core/tests/sql/aggregates.rs
@@ -1076,37 +1076,3 @@ async fn aggregate_with_alias() -> Result<()> {
);
Ok(())
}
-
-#[tokio::test]
-async fn array_agg_zero() -> Result<()> {
- let ctx = SessionContext::new();
- // 2 different aggregate functions: avg and sum(distinct)
- let sql = "SELECT ARRAY_AGG([]);";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+------------------------+",
- "| ARRAYAGG(List([NULL])) |",
- "+------------------------+",
- "| [] |",
- "+------------------------+",
- ];
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn array_agg_one() -> Result<()> {
- let ctx = SessionContext::new();
- // 2 different aggregate functions: avg and sum(distinct)
- let sql = "SELECT ARRAY_AGG([1]);";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = vec![
- "+---------------------+",
- "| ARRAYAGG(List([1])) |",
- "+---------------------+",
- "| [[1]] |",
- "+---------------------+",
- ];
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
diff --git a/datafusion/core/tests/sqllogictests/README.md b/datafusion/core/tests/sqllogictests/README.md
index 648e0a3ea..d47224578 100644
--- a/datafusion/core/tests/sqllogictests/README.md
+++ b/datafusion/core/tests/sqllogictests/README.md
@@ -21,7 +21,11 @@
This is the Datafusion implementation of [sqllogictest](https://www.sqlite.org/sqllogictest/doc/trunk/about.wiki). We use [sqllogictest-rs](https://github.com/risinglightdb/sqllogictest-rs) as a parser/runner of `.slt` files in `test_files`.
-#### Running tests
+#### Running tests: Validation Mode
+
+In this model, `sqllogictests` runs the statements and queries in a `.slt` file, comparing the expected output in the file to the output produced by that run.
+
+Run all tests suites in validation mode
```shell
cargo test -p datafusion --test sqllogictests
@@ -40,6 +44,17 @@ Run only the tests in `information_schema.slt`:
cargo test -p datafusion --test sqllogictests -- information
```
+#### Updating tests: Completion Mode
+
+In test script completion mode, `sqllogictests` reads a prototype script and runs the statements and queries against the database engine. The output is is a full script that is a copy of the prototype script with result inserted.
+
+You can update tests by passing the `--complete` argument.
+
+```shell
+# Update ddl.slt with output from running
+cargo test -p datafusion --test sqllogictests -- ddl --complete
+```
+
#### sqllogictests
> :warning: **Warning**:Datafusion's sqllogictest implementation and migration is still in progress. Definitions taken from https://www.sqlite.org/sqllogictest/doc/trunk/about.wiki
diff --git a/datafusion/core/tests/sqllogictests/src/error.rs b/datafusion/core/tests/sqllogictests/src/error.rs
index d645e054b..a7f252fe7 100644
--- a/datafusion/core/tests/sqllogictests/src/error.rs
+++ b/datafusion/core/tests/sqllogictests/src/error.rs
@@ -38,6 +38,9 @@ pub enum DFSqlLogicTestError {
/// Error from arrow-rs
#[error("Arrow error: {0}")]
Arrow(ArrowError),
+ /// Generic error
+ #[error("Other Error: {0}")]
+ Other(String),
}
impl From<TestError> for DFSqlLogicTestError {
@@ -63,3 +66,9 @@ impl From<ArrowError> for DFSqlLogicTestError {
DFSqlLogicTestError::Arrow(value)
}
}
+
+impl From<String> for DFSqlLogicTestError {
+ fn from(value: String) -> Self {
+ DFSqlLogicTestError::Other(value)
+ }
+}
diff --git a/datafusion/core/tests/sqllogictests/src/main.rs b/datafusion/core/tests/sqllogictests/src/main.rs
index e8c44babe..b89edcc56 100644
--- a/datafusion/core/tests/sqllogictests/src/main.rs
+++ b/datafusion/core/tests/sqllogictests/src/main.rs
@@ -23,7 +23,7 @@ use log::info;
use normalize::convert_batches;
use sqllogictest::DBOutput;
use sqlparser::ast::Statement as SQLStatement;
-use std::path::{Path, PathBuf};
+use std::path::Path;
use std::time::Duration;
use crate::error::{DFSqlLogicTestError, Result};
@@ -78,16 +78,19 @@ pub async fn main() -> Result<()> {
#[cfg(not(target_family = "windows"))]
pub async fn main() -> Result<()> {
// Enable logging (e.g. set RUST_LOG=debug to see debug logs)
+
+ use sqllogictest::{default_validator, update_test_file};
env_logger::init();
- // run each file using its own new DB
- //
- // Note: can't use tester.run_parallel_async()
- // as that will reuse the same SessionContext
- //
- // We could run these tests in parallel eventually if we wanted.
+ let options = Options::new();
+
+ // default to all files in test directory filtering based on name
+ let files: Vec<_> = std::fs::read_dir(TEST_DIRECTORY)
+ .unwrap()
+ .map(|path| path.unwrap().path())
+ .filter(|path| options.check_test_file(path.as_path()))
+ .collect();
- let files = get_test_files();
info!("Running test files {:?}", files);
for path in files {
@@ -95,61 +98,29 @@ pub async fn main() -> Result<()> {
let file_name = path.file_name().unwrap().to_str().unwrap().to_string();
+ // Create the test runner
let ctx = context_for_test_file(&file_name).await;
-
- let mut tester = sqllogictest::Runner::new(DataFusion { ctx, file_name });
- tester.run_file_async(path).await?;
+ let mut runner = sqllogictest::Runner::new(DataFusion { ctx, file_name });
+
+ // run each file using its own new DB
+ //
+ // We could run these tests in parallel eventually if we wanted.
+ if options.complete_mode {
+ info!("Using complete mode to complete {}", path.display());
+ let col_separator = " ";
+ let validator = default_validator;
+ update_test_file(path, runner, col_separator, validator)
+ .await
+ .map_err(|e| e.to_string())?;
+ } else {
+ // run the test normally:
+ runner.run_file_async(path).await?;
+ }
}
Ok(())
}
-/// Gets a list of test files to execute. If there were arguments
-/// passed to the program treat it as a cargo test filter (substring match on filenames)
-fn get_test_files() -> Vec<PathBuf> {
- info!("Test directory: {}", TEST_DIRECTORY);
-
- let args: Vec<_> = std::env::args().collect();
-
- // treat args after the first as filters to run (substring matching)
- let filters = if !args.is_empty() {
- args.iter()
- .skip(1)
- .map(|arg| arg.as_str())
- .collect::<Vec<_>>()
- } else {
- vec![]
- };
-
- // default to all files in test directory filtering based on name
- std::fs::read_dir(TEST_DIRECTORY)
- .unwrap()
- .map(|path| path.unwrap().path())
- .filter(|path| check_test_file(&filters, path.as_path()))
- .collect()
-}
-
-/// because this test can be run as a cargo test, commands like
-///
-/// ```shell
-/// cargo test foo
-/// ```
-///
-/// Will end up passing `foo` as a command line argument.
-///
-/// be compatible with this, treat the command line arguments as a
-/// filter and that does a substring match on each input.
-/// returns true f this path should be run
-fn check_test_file(filters: &[&str], path: &Path) -> bool {
- if filters.is_empty() {
- return true;
- }
-
- // otherwise check if any filter matches
- let path_str = path.to_string_lossy();
- filters.iter().any(|filter| path_str.contains(filter))
-}
-
/// Create a SessionContext, configured for the specific test
async fn context_for_test_file(file_name: &str) -> SessionContext {
match file_name {
@@ -189,3 +160,59 @@ async fn run_query(ctx: &SessionContext, sql: impl Into<String>) -> Result<DBOut
let formatted_batches = convert_batches(results)?;
Ok(formatted_batches)
}
+
+/// Parsed command line options
+struct Options {
+ // regex like
+ /// arguments passed to the program which are treated as
+ /// cargo test filter (substring match on filenames)
+ filters: Vec<String>,
+
+ /// Auto complete mode to fill out expected results
+ complete_mode: bool,
+}
+
+impl Options {
+ fn new() -> Self {
+ let args: Vec<_> = std::env::args().collect();
+
+ let complete_mode = args.iter().any(|a| a == "--complete");
+
+ // treat args after the first as filters to run (substring matching)
+ let filters = if !args.is_empty() {
+ args.into_iter()
+ .skip(1)
+ // ignore command line arguments like `--complete`
+ .filter(|arg| !arg.as_str().starts_with("--"))
+ .collect::<Vec<_>>()
+ } else {
+ vec![]
+ };
+
+ Self {
+ filters,
+ complete_mode,
+ }
+ }
+
+ /// Because this test can be run as a cargo test, commands like
+ ///
+ /// ```shell
+ /// cargo test foo
+ /// ```
+ ///
+ /// Will end up passing `foo` as a command line argument.
+ ///
+ /// To be compatible with this, treat the command line arguments as a
+ /// filter and that does a substring match on each input. returns
+ /// true f this path should be run
+ fn check_test_file(&self, path: &Path) -> bool {
+ if self.filters.is_empty() {
+ return true;
+ }
+
+ // otherwise check if any filter matches
+ let path_str = path.to_string_lossy();
+ self.filters.iter().any(|filter| path_str.contains(filter))
+ }
+}
diff --git a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
index 90241e3e8..7a1b012b8 100644
--- a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
@@ -1077,3 +1077,17 @@ b 5
c 4
d 4
e 4
+
+
+
+# array_agg_zero
+query U
+SELECT ARRAY_AGG([]);
+----
+[]
+
+# array_agg_one
+query U
+SELECT ARRAY_AGG([1]);
+----
+[[1]]
diff --git a/datafusion/core/tests/sqllogictests/test_files/ddl.slt b/datafusion/core/tests/sqllogictests/test_files/ddl.slt
index 7445157ee..2e0667b58 100644
--- a/datafusion/core/tests/sqllogictests/test_files/ddl.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/ddl.slt
@@ -411,5 +411,11 @@ SELECT * FROM aggregate_simple order by c1 LIMIT 1;
----
0.00001 0.000000000001 true
+query CCC
+SELECT * FROM aggregate_simple order by c1 DESC LIMIT 1;
+----
+0.00005 0.000000000005 true
+
+
statement ok
DROP TABLE aggregate_simple