You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/07/06 19:09:30 UTC

[arrow-datafusion] branch master updated: MINOR: Add documentation for running integration tests (#2839)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 06450e8c7 MINOR: Add documentation for running integration tests (#2839)
06450e8c7 is described below

commit 06450e8c71f983bf4561703b0aaeb57b96b7ea93
Author: Andy Grove <ag...@apache.org>
AuthorDate: Wed Jul 6 12:09:24 2022 -0700

    MINOR: Add documentation for running integration tests (#2839)
    
    * make env vars optional
    
    * Document how to run integration tests
    
    * fix path
    
    * fix
    
    * asf header
    
    * fix doc text
---
 .github/workflows/rust.yml                       | 16 +-------
 integration-tests/README.md                      | 49 ++++++++++++++++++++++++
 integration-tests/create_test_table_postgres.sql | 15 ++++++++
 integration-tests/test_psql_parity.py            | 39 +++++++++++--------
 4 files changed, 87 insertions(+), 32 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 216123132..b534ed224 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -170,21 +170,7 @@ jobs:
           # make sure psql can access the server
           echo "$POSTGRES_HOST:$POSTGRES_PORT:$POSTGRES_DB:$POSTGRES_USER:$POSTGRES_PASSWORD" | tee ~/.pgpass
           chmod 0600 ~/.pgpass
-          psql -d "$POSTGRES_DB" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -U "$POSTGRES_USER" -c 'CREATE TABLE IF NOT EXISTS test (
-            c1 character varying NOT NULL,
-            c2 integer NOT NULL,
-            c3 smallint NOT NULL,
-            c4 smallint NOT NULL,
-            c5 integer NOT NULL,
-            c6 bigint NOT NULL,
-            c7 smallint NOT NULL,
-            c8 integer NOT NULL,
-            c9 bigint NOT NULL,
-            c10 character varying NOT NULL,
-            c11 double precision NOT NULL,
-            c12 double precision NOT NULL,
-            c13 character varying NOT NULL
-          );'
+          psql -d "$POSTGRES_DB" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -U "$POSTGRES_USER" -f integration-tests/create_test_table_postgres.sql
           psql -d "$POSTGRES_DB" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -U "$POSTGRES_USER" -c "\copy test FROM '$(pwd)/testing/data/csv/aggregate_test_100.csv' WITH (FORMAT csv, HEADER true);"
         env:
           POSTGRES_HOST: localhost
diff --git a/integration-tests/README.md b/integration-tests/README.md
new file mode 100644
index 000000000..b97c9ac61
--- /dev/null
+++ b/integration-tests/README.md
@@ -0,0 +1,49 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# DataFusion Integration Tests
+
+These test run SQL queries against both DataFusion and Postgres and compare the results for parity.
+
+## Setup
+
+Set the following environment variables as appropriate for your environment. They are all optional.
+
+- `POSTGRES_DB`
+- `POSTGRES_USER`
+- `POSTGRES_HOST`
+- `POSTGRES_PORT`
+
+Create a Postgres database and then create the test table by running this script:
+
+```bash
+psql -d "$POSTGRES_DB" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -U "$POSTGRES_USER" \
+  -f create_test_table_postgres.sql
+```
+
+Populate the table by running this command:
+
+```bash
+psql -d "$POSTGRES_DB" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -U "$POSTGRES_USER" \
+  -c "\copy test FROM '$(pwd)/testing/data/csv/aggregate_test_100.csv' WITH (FORMAT csv, HEADER true);"
+```
+
+## Run Tests
+
+Run `pytest` from the root of the repository.
diff --git a/integration-tests/create_test_table_postgres.sql b/integration-tests/create_test_table_postgres.sql
new file mode 100644
index 000000000..7cc154b1f
--- /dev/null
+++ b/integration-tests/create_test_table_postgres.sql
@@ -0,0 +1,15 @@
+CREATE TABLE IF NOT EXISTS test (
+c1 character varying NOT NULL,
+c2 integer NOT NULL,
+c3 smallint NOT NULL,
+c4 smallint NOT NULL,
+c5 integer NOT NULL,
+c6 bigint NOT NULL,
+c7 smallint NOT NULL,
+c8 integer NOT NULL,
+c9 bigint NOT NULL,
+c10 character varying NOT NULL,
+c11 double precision NOT NULL,
+c12 double precision NOT NULL,
+c13 character varying NOT NULL
+);
\ No newline at end of file
diff --git a/integration-tests/test_psql_parity.py b/integration-tests/test_psql_parity.py
index f1c6bf6a5..506100bbc 100644
--- a/integration-tests/test_psql_parity.py
+++ b/integration-tests/test_psql_parity.py
@@ -52,24 +52,29 @@ def generate_csv_from_datafusion(fname: str):
 
 
 def generate_csv_from_psql(fname: str):
-    return subprocess.check_output(
-        [
-            "psql",
-            "-d",
-            pg_db,
-            "-h",
-            pg_host,
-            "-p",
-            pg_port,
-            "-U",
-            pg_user,
-            "-X",
-            "--csv",
-            "-f",
-            fname,
-        ]
-    )
 
+    cmd = ["psql"]
+
+    if pg_db is not None:
+        cmd.extend(["-d", pg_db])
+
+    if pg_user is not None:
+        cmd.extend(["-U", pg_user])
+
+    if pg_host is not None:
+        cmd.extend(["-h", pg_host])
+
+    if pg_port is not None:
+        cmd.extend(["-p", pg_port])
+
+    cmd.extend([
+        "-X",
+        "--csv",
+        "-f",
+        fname,
+    ])
+
+    return subprocess.check_output(cmd)
 
 root = Path(os.path.dirname(__file__)) / "sqls"
 test_files = set(root.glob("*.sql"))