You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/07/06 19:09:30 UTC
[arrow-datafusion] branch master updated: MINOR: Add documentation for running integration tests (#2839)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 06450e8c7 MINOR: Add documentation for running integration tests (#2839)
06450e8c7 is described below
commit 06450e8c71f983bf4561703b0aaeb57b96b7ea93
Author: Andy Grove <ag...@apache.org>
AuthorDate: Wed Jul 6 12:09:24 2022 -0700
MINOR: Add documentation for running integration tests (#2839)
* make env vars optional
* Document how to run integration tests
* fix path
* fix
* asf header
* fix doc text
---
.github/workflows/rust.yml | 16 +-------
integration-tests/README.md | 49 ++++++++++++++++++++++++
integration-tests/create_test_table_postgres.sql | 15 ++++++++
integration-tests/test_psql_parity.py | 39 +++++++++++--------
4 files changed, 87 insertions(+), 32 deletions(-)
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 216123132..b534ed224 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -170,21 +170,7 @@ jobs:
# make sure psql can access the server
echo "$POSTGRES_HOST:$POSTGRES_PORT:$POSTGRES_DB:$POSTGRES_USER:$POSTGRES_PASSWORD" | tee ~/.pgpass
chmod 0600 ~/.pgpass
- psql -d "$POSTGRES_DB" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -U "$POSTGRES_USER" -c 'CREATE TABLE IF NOT EXISTS test (
- c1 character varying NOT NULL,
- c2 integer NOT NULL,
- c3 smallint NOT NULL,
- c4 smallint NOT NULL,
- c5 integer NOT NULL,
- c6 bigint NOT NULL,
- c7 smallint NOT NULL,
- c8 integer NOT NULL,
- c9 bigint NOT NULL,
- c10 character varying NOT NULL,
- c11 double precision NOT NULL,
- c12 double precision NOT NULL,
- c13 character varying NOT NULL
- );'
+ psql -d "$POSTGRES_DB" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -U "$POSTGRES_USER" -f integration-tests/create_test_table_postgres.sql
psql -d "$POSTGRES_DB" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -U "$POSTGRES_USER" -c "\copy test FROM '$(pwd)/testing/data/csv/aggregate_test_100.csv' WITH (FORMAT csv, HEADER true);"
env:
POSTGRES_HOST: localhost
diff --git a/integration-tests/README.md b/integration-tests/README.md
new file mode 100644
index 000000000..b97c9ac61
--- /dev/null
+++ b/integration-tests/README.md
@@ -0,0 +1,49 @@
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+# DataFusion Integration Tests
+
+These test run SQL queries against both DataFusion and Postgres and compare the results for parity.
+
+## Setup
+
+Set the following environment variables as appropriate for your environment. They are all optional.
+
+- `POSTGRES_DB`
+- `POSTGRES_USER`
+- `POSTGRES_HOST`
+- `POSTGRES_PORT`
+
+Create a Postgres database and then create the test table by running this script:
+
+```bash
+psql -d "$POSTGRES_DB" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -U "$POSTGRES_USER" \
+ -f create_test_table_postgres.sql
+```
+
+Populate the table by running this command:
+
+```bash
+psql -d "$POSTGRES_DB" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -U "$POSTGRES_USER" \
+ -c "\copy test FROM '$(pwd)/testing/data/csv/aggregate_test_100.csv' WITH (FORMAT csv, HEADER true);"
+```
+
+## Run Tests
+
+Run `pytest` from the root of the repository.
diff --git a/integration-tests/create_test_table_postgres.sql b/integration-tests/create_test_table_postgres.sql
new file mode 100644
index 000000000..7cc154b1f
--- /dev/null
+++ b/integration-tests/create_test_table_postgres.sql
@@ -0,0 +1,15 @@
+CREATE TABLE IF NOT EXISTS test (
+c1 character varying NOT NULL,
+c2 integer NOT NULL,
+c3 smallint NOT NULL,
+c4 smallint NOT NULL,
+c5 integer NOT NULL,
+c6 bigint NOT NULL,
+c7 smallint NOT NULL,
+c8 integer NOT NULL,
+c9 bigint NOT NULL,
+c10 character varying NOT NULL,
+c11 double precision NOT NULL,
+c12 double precision NOT NULL,
+c13 character varying NOT NULL
+);
\ No newline at end of file
diff --git a/integration-tests/test_psql_parity.py b/integration-tests/test_psql_parity.py
index f1c6bf6a5..506100bbc 100644
--- a/integration-tests/test_psql_parity.py
+++ b/integration-tests/test_psql_parity.py
@@ -52,24 +52,29 @@ def generate_csv_from_datafusion(fname: str):
def generate_csv_from_psql(fname: str):
- return subprocess.check_output(
- [
- "psql",
- "-d",
- pg_db,
- "-h",
- pg_host,
- "-p",
- pg_port,
- "-U",
- pg_user,
- "-X",
- "--csv",
- "-f",
- fname,
- ]
- )
+ cmd = ["psql"]
+
+ if pg_db is not None:
+ cmd.extend(["-d", pg_db])
+
+ if pg_user is not None:
+ cmd.extend(["-U", pg_user])
+
+ if pg_host is not None:
+ cmd.extend(["-h", pg_host])
+
+ if pg_port is not None:
+ cmd.extend(["-p", pg_port])
+
+ cmd.extend([
+ "-X",
+ "--csv",
+ "-f",
+ fname,
+ ])
+
+ return subprocess.check_output(cmd)
root = Path(os.path.dirname(__file__)) / "sqls"
test_files = set(root.glob("*.sql"))