You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2021/05/31 10:16:50 UTC
[arrow-datafusion] branch master updated: include test data and add
aggregation tests in integration test (#425)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 80abb09 include test data and add aggregation tests in integration test (#425)
80abb09 is described below
commit 80abb09e4ee89f670980e02bb7e3fbd457a916d0
Author: Jiayu Liu <Ji...@users.noreply.github.com>
AuthorDate: Mon May 31 18:16:44 2021 +0800
include test data and add aggregation tests in integration test (#425)
* include test data
* bump
---
.github/workflows/rust.yml | 19 ++++++++++++++-
datafusion-cli/src/main.rs | 24 +++++++++++--------
integration-tests/create_test_table.sql | 34 +++++++++++++++++++++++++++
integration-tests/sqls/simple_aggregation.sql | 24 +++++++++++++++++++
integration-tests/sqls/simple_group_by.sql | 27 +++++++++++++++++++++
integration-tests/test_psql_parity.py | 6 ++++-
6 files changed, 122 insertions(+), 12 deletions(-)
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index f492b2e..933b513 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -155,6 +155,8 @@ jobs:
--health-retries 5
steps:
- uses: actions/checkout@v2
+ with:
+ submodules: true
- uses: actions/setup-python@v2
with:
python-version: "3.8"
@@ -167,7 +169,22 @@ jobs:
# make sure psql can access the server
echo "$POSTGRES_HOST:$POSTGRES_PORT:$POSTGRES_DB:$POSTGRES_USER:$POSTGRES_PASSWORD" | tee ~/.pgpass
chmod 0600 ~/.pgpass
- psql -d "$POSTGRES_DB" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -U "$POSTGRES_USER" -c 'select now() as now'
+ psql -d "$POSTGRES_DB" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -U "$POSTGRES_USER" -c 'CREATE TABLE IF NOT EXISTS test (
+ c1 character varying NOT NULL,
+ c2 integer NOT NULL,
+ c3 smallint NOT NULL,
+ c4 smallint NOT NULL,
+ c5 integer NOT NULL,
+ c6 bigint NOT NULL,
+ c7 smallint NOT NULL,
+ c8 integer NOT NULL,
+ c9 bigint NOT NULL,
+ c10 character varying NOT NULL,
+ c11 double precision NOT NULL,
+ c12 double precision NOT NULL,
+ c13 character varying NOT NULL
+ );'
+ psql -d "$POSTGRES_DB" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -U "$POSTGRES_USER" -c "\copy test FROM '$(pwd)/testing/data/csv/aggregate_test_100.csv' WITH (FORMAT csv, HEADER true);"
env:
POSTGRES_HOST: localhost
POSTGRES_PORT: ${{ job.services.postgres.ports[5432] }}
diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs
index 5b35880..083710f 100644
--- a/datafusion-cli/src/main.rs
+++ b/datafusion-cli/src/main.rs
@@ -58,9 +58,10 @@ pub async fn main() {
)
.arg(
Arg::with_name("file")
- .help("Execute commands from file, then exit")
+ .help("Execute commands from file(s), then exit")
.short("f")
.long("file")
+ .multiple(true)
.validator(is_valid_file)
.takes_value(true),
)
@@ -112,22 +113,25 @@ pub async fn main() {
let quiet = matches.is_present("quiet");
let print_options = PrintOptions { format, quiet };
- if let Some(file_path) = matches.value_of("file") {
- let file = File::open(file_path)
- .unwrap_or_else(|err| panic!("cannot open file '{}': {}", file_path, err));
- let mut reader = BufReader::new(file);
- exec_from_lines(&mut reader, execution_config, print_options).await;
+ if let Some(file_paths) = matches.values_of("file") {
+ let files = file_paths
+ .map(|file_path| File::open(file_path).unwrap())
+ .collect::<Vec<_>>();
+ let mut ctx = ExecutionContext::with_config(execution_config);
+ for file in files {
+ let mut reader = BufReader::new(file);
+ exec_from_lines(&mut ctx, &mut reader, print_options.clone()).await;
+ }
} else {
exec_from_repl(execution_config, print_options).await;
}
}
async fn exec_from_lines(
+ ctx: &mut ExecutionContext,
reader: &mut BufReader<File>,
- execution_config: ExecutionConfig,
print_options: PrintOptions,
) {
- let mut ctx = ExecutionContext::with_config(execution_config);
let mut query = "".to_owned();
for line in reader.lines() {
@@ -139,7 +143,7 @@ async fn exec_from_lines(
let line = line.trim_end();
query.push_str(line);
if line.ends_with(';') {
- match exec_and_print(&mut ctx, print_options.clone(), query).await {
+ match exec_and_print(ctx, print_options.clone(), query).await {
Ok(_) => {}
Err(err) => println!("{:?}", err),
}
@@ -156,7 +160,7 @@ async fn exec_from_lines(
// run the left over query if the last statement doesn't contain ‘;’
if !query.is_empty() {
- match exec_and_print(&mut ctx, print_options, query).await {
+ match exec_and_print(ctx, print_options, query).await {
Ok(_) => {}
Err(err) => println!("{:?}", err),
}
diff --git a/integration-tests/create_test_table.sql b/integration-tests/create_test_table.sql
new file mode 100644
index 0000000..89b0861
--- /dev/null
+++ b/integration-tests/create_test_table.sql
@@ -0,0 +1,34 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+
+-- http://www.apache.org/licenses/LICENSE-2.0
+
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+CREATE EXTERNAL TABLE test (
+ c1 VARCHAR NOT NULL,
+ c2 INT NOT NULL,
+ c3 SMALLINT NOT NULL,
+ c4 SMALLINT NOT NULL,
+ c5 INT NOT NULL,
+ c6 BIGINT NOT NULL,
+ c7 SMALLINT NOT NULL,
+ c8 INT NOT NULL,
+ c9 BIGINT NOT NULL,
+ c10 VARCHAR NOT NULL,
+ c11 FLOAT NOT NULL,
+ c12 DOUBLE NOT NULL,
+ c13 VARCHAR NOT NULL
+)
+STORED AS CSV
+WITH HEADER ROW
+LOCATION 'testing/data/csv/aggregate_test_100.csv';
diff --git a/integration-tests/sqls/simple_aggregation.sql b/integration-tests/sqls/simple_aggregation.sql
new file mode 100644
index 0000000..cbe37ed
--- /dev/null
+++ b/integration-tests/sqls/simple_aggregation.sql
@@ -0,0 +1,24 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+
+-- http://www.apache.org/licenses/LICENSE-2.0
+
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+SELECT
+ count(*) AS count_all,
+ count(c3) AS count_c3,
+ avg(c3) AS avg,
+ sum(c3) AS sum,
+ max(c3) AS max,
+ min(c3) AS min
+FROM test;
diff --git a/integration-tests/sqls/simple_group_by.sql b/integration-tests/sqls/simple_group_by.sql
new file mode 100644
index 0000000..11fe1cc
--- /dev/null
+++ b/integration-tests/sqls/simple_group_by.sql
@@ -0,0 +1,27 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+
+-- http://www.apache.org/licenses/LICENSE-2.0
+
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+
+select
+ c2,
+ sum(c3) sum_c3,
+ avg(c3) avg_c3,
+ max(c3) max_c3,
+ min(c3) min_c3,
+ count(c3) count_c3
+from test
+group by c2
+order by c2;
diff --git a/integration-tests/test_psql_parity.py b/integration-tests/test_psql_parity.py
index 204f906..f4967b8 100644
--- a/integration-tests/test_psql_parity.py
+++ b/integration-tests/test_psql_parity.py
@@ -32,12 +32,16 @@ pg_db, pg_user, pg_host, pg_port = [
)
]
+CREATE_TABLE_SQL_FILE = "integration-tests/create_test_table.sql"
+
def generate_csv_from_datafusion(fname: str):
return subprocess.check_output(
[
"./target/debug/datafusion-cli",
"-f",
+ CREATE_TABLE_SQL_FILE,
+ "-f",
fname,
"--format",
"csv",
@@ -70,7 +74,7 @@ class PsqlParityTest(unittest.TestCase):
def test_parity(self):
root = Path(os.path.dirname(__file__)) / "sqls"
files = set(root.glob("*.sql"))
- self.assertEqual(len(files), 2, msg="tests are missed")
+ self.assertEqual(len(files), 4, msg="tests are missed")
for fname in files:
with self.subTest(fname=fname):
datafusion_output = pd.read_csv(