You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/06/14 20:31:20 UTC

[arrow-datafusion] branch main updated: minor: use sql to setup test data for aggregate.slt rather than rust (#6664)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new c3d5d77e44 minor: use sql to setup test data for aggregate.slt rather than rust (#6664)
c3d5d77e44 is described below

commit c3d5d77e447e51c2cca814a67706e5ab3e050ced
Author: zhenxing jiang <ji...@gmail.com>
AuthorDate: Thu Jun 15 04:31:13 2023 +0800

    minor: use sql to setup test data for aggregate.slt rather than rust (#6664)
    
    * port register_aggregate_test_100 to sql
    
    * port register_decimal_table to sql
    
    * port register_median_test_tables to sql
    
    * port register_test_data to sql
---
 datafusion/core/tests/sqllogictests/src/main.rs    |   5 -
 datafusion/core/tests/sqllogictests/src/setup.rs   | 137 +--------------------
 datafusion/core/tests/sqllogictests/src/utils.rs   |  40 ------
 .../tests/sqllogictests/test_files/aggregate.slt   |  78 +++++++++---
 4 files changed, 66 insertions(+), 194 deletions(-)

diff --git a/datafusion/core/tests/sqllogictests/src/main.rs b/datafusion/core/tests/sqllogictests/src/main.rs
index adb3be9a6e..f71e98b54b 100644
--- a/datafusion/core/tests/sqllogictests/src/main.rs
+++ b/datafusion/core/tests/sqllogictests/src/main.rs
@@ -33,7 +33,6 @@ use crate::engines::postgres::Postgres;
 
 mod engines;
 mod setup;
-mod utils;
 
 const TEST_DIRECTORY: &str = "tests/sqllogictests/test_files/";
 const PG_COMPAT_FILE_PREFIX: &str = "pg_compat_";
@@ -263,10 +262,6 @@ async fn context_for_test_file(relative_path: &Path) -> Option<TestContext> {
 
     let file_name = relative_path.file_name().unwrap().to_str().unwrap();
     match file_name {
-        "aggregate.slt" => {
-            info!("Registering aggregate tables");
-            setup::register_aggregate_tables(test_ctx.session_ctx()).await;
-        }
         "scalar.slt" => {
             info!("Registering scalar tables");
             setup::register_scalar_tables(test_ctx.session_ctx()).await;
diff --git a/datafusion/core/tests/sqllogictests/src/setup.rs b/datafusion/core/tests/sqllogictests/src/setup.rs
index 26f9d2501a..34365f509a 100644
--- a/datafusion/core/tests/sqllogictests/src/setup.rs
+++ b/datafusion/core/tests/sqllogictests/src/setup.rs
@@ -17,22 +17,17 @@
 
 use datafusion::{
     arrow::{
-        array::{
-            ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array,
-            Int8Array, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
-        },
+        array::Float64Array,
         datatypes::{DataType, Field, Schema},
         record_batch::RecordBatch,
     },
-    datasource::MemTable,
     prelude::{CsvReadOptions, SessionContext},
-    test_util,
 };
 use std::fs::File;
 use std::io::Write;
 use std::sync::Arc;
 
-use crate::{utils, TestContext};
+use crate::TestContext;
 
 #[cfg(feature = "avro")]
 pub async fn register_avro_tables(ctx: &mut crate::TestContext) {
@@ -66,134 +61,6 @@ pub async fn register_avro_tables(ctx: &mut crate::TestContext) {
         .unwrap();
 }
 
-pub async fn register_aggregate_tables(ctx: &SessionContext) {
-    register_aggregate_test_100(ctx).await;
-    register_decimal_table(ctx);
-    register_median_test_tables(ctx);
-    register_test_data(ctx);
-}
-
-fn register_median_test_tables(ctx: &SessionContext) {
-    // Register median tables
-    let items: Vec<(&str, DataType, ArrayRef)> = vec![
-        (
-            "i8",
-            DataType::Int8,
-            Arc::new(Int8Array::from(vec![i8::MIN, i8::MIN, 100, i8::MAX])),
-        ),
-        (
-            "i16",
-            DataType::Int16,
-            Arc::new(Int16Array::from(vec![i16::MIN, i16::MIN, 100, i16::MAX])),
-        ),
-        (
-            "i32",
-            DataType::Int32,
-            Arc::new(Int32Array::from(vec![i32::MIN, i32::MIN, 100, i32::MAX])),
-        ),
-        (
-            "i64",
-            DataType::Int64,
-            Arc::new(Int64Array::from(vec![i64::MIN, i64::MIN, 100, i64::MAX])),
-        ),
-        (
-            "u8",
-            DataType::UInt8,
-            Arc::new(UInt8Array::from(vec![u8::MIN, u8::MIN, 100, u8::MAX])),
-        ),
-        (
-            "u16",
-            DataType::UInt16,
-            Arc::new(UInt16Array::from(vec![u16::MIN, u16::MIN, 100, u16::MAX])),
-        ),
-        (
-            "u32",
-            DataType::UInt32,
-            Arc::new(UInt32Array::from(vec![u32::MIN, u32::MIN, 100, u32::MAX])),
-        ),
-        (
-            "u64",
-            DataType::UInt64,
-            Arc::new(UInt64Array::from(vec![u64::MIN, u64::MIN, 100, u64::MAX])),
-        ),
-        (
-            "f32",
-            DataType::Float32,
-            Arc::new(Float32Array::from(vec![1.1, 4.4, 5.5, 3.3, 2.2])),
-        ),
-        (
-            "f64",
-            DataType::Float64,
-            Arc::new(Float64Array::from(vec![1.1, 4.4, 5.5, 3.3, 2.2])),
-        ),
-        (
-            "f64_nan",
-            DataType::Float64,
-            Arc::new(Float64Array::from(vec![1.1, f64::NAN, f64::NAN, f64::NAN])),
-        ),
-    ];
-
-    for (name, data_type, values) in items {
-        let batch = RecordBatch::try_new(
-            Arc::new(Schema::new(vec![Field::new("a", data_type, false)])),
-            vec![values],
-        )
-        .unwrap();
-        let table_name = &format!("median_{name}");
-        ctx.register_batch(table_name, batch).unwrap();
-    }
-}
-
-fn register_test_data(ctx: &SessionContext) {
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("c1", DataType::Int64, true),
-        Field::new("c2", DataType::Int64, true),
-    ]));
-
-    let data = RecordBatch::try_new(
-        schema,
-        vec![
-            Arc::new(Int64Array::from(vec![
-                Some(0),
-                Some(1),
-                None,
-                Some(3),
-                Some(3),
-            ])),
-            Arc::new(Int64Array::from(vec![
-                None,
-                Some(1),
-                Some(1),
-                Some(2),
-                Some(2),
-            ])),
-        ],
-    )
-    .unwrap();
-
-    ctx.register_batch("test", data).unwrap();
-}
-
-fn register_decimal_table(ctx: &SessionContext) {
-    let batch_decimal = utils::make_decimal();
-    let schema = batch_decimal.schema();
-    let partitions = vec![vec![batch_decimal]];
-    let provider = Arc::new(MemTable::try_new(schema, partitions).unwrap());
-    ctx.register_table("d_table", provider).unwrap();
-}
-
-async fn register_aggregate_test_100(ctx: &SessionContext) {
-    let test_data = datafusion::test_util::arrow_test_data();
-    let schema = test_util::aggr_test_schema();
-    ctx.register_csv(
-        "aggregate_test_100",
-        &format!("{test_data}/csv/aggregate_test_100.csv"),
-        CsvReadOptions::new().schema(&schema),
-    )
-    .await
-    .unwrap();
-}
-
 pub async fn register_scalar_tables(ctx: &SessionContext) {
     register_nan_table(ctx)
 }
diff --git a/datafusion/core/tests/sqllogictests/src/utils.rs b/datafusion/core/tests/sqllogictests/src/utils.rs
deleted file mode 100644
index 4d064a76e2..0000000000
--- a/datafusion/core/tests/sqllogictests/src/utils.rs
+++ /dev/null
@@ -1,40 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use datafusion::arrow::{
-    array::{Array, Decimal128Builder},
-    datatypes::{Field, Schema},
-    record_batch::RecordBatch,
-};
-use std::sync::Arc;
-
-// TODO: move this to datafusion::test_utils?
-pub fn make_decimal() -> RecordBatch {
-    let mut decimal_builder = Decimal128Builder::with_capacity(20);
-    for i in 110000..110010 {
-        decimal_builder.append_value(i as i128);
-    }
-    for i in 100000..100010 {
-        decimal_builder.append_value(-i as i128);
-    }
-    let array = decimal_builder
-        .finish()
-        .with_precision_and_scale(10, 3)
-        .unwrap();
-    let schema = Schema::new(vec![Field::new("c1", array.data_type().clone(), true)]);
-    RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array)]).unwrap()
-}
diff --git a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
index ca1f8035f6..bf1ab2cbd1 100644
--- a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
@@ -15,6 +15,56 @@
 # specific language governing permissions and limitations
 # under the License.
 
+#######
+# Setup test data table
+#######
+statement ok
+CREATE EXTERNAL TABLE aggregate_test_100 (
+  c1  VARCHAR NOT NULL,
+  c2  TINYINT NOT NULL,
+  c3  SMALLINT NOT NULL,
+  c4  SMALLINT,
+  c5  INT,
+  c6  BIGINT NOT NULL,
+  c7  SMALLINT NOT NULL,
+  c8  INT NOT NULL,
+  c9  INT UNSIGNED NOT NULL,
+  c10 BIGINT UNSIGNED NOT NULL,
+  c11 FLOAT NOT NULL,
+  c12 DOUBLE NOT NULL,
+  c13 VARCHAR NOT NULL
+)
+STORED AS CSV
+WITH HEADER ROW
+LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+
+statement ok
+CREATE TABLE d_table (c1 decimal(10,3)) as values
+(110.000), (110.001), (110.002), (110.003), (110.004), (110.005), (110.006), (110.007), (110.008), (110.009),
+(-100.000),(-100.001),(-100.002),(-100.003),(-100.004),(-100.005),(-100.006),(-100.007),(-100.008),(-100.009)
+
+statement ok
+CREATE TABLE median_table (
+    col_i8 TINYINT,
+    col_i16 SMALLINT,
+    col_i32 INT,
+    col_i64 BIGINT,
+    col_u8 TINYINT UNSIGNED,
+    col_u16 SMALLINT UNSIGNED,
+    col_u32 INT UNSIGNED,
+    col_u64 BIGINT UNSIGNED,
+    col_f32 FLOAT,
+    col_f64 DOUBLE,
+    col_f64_nan DOUBLE
+) as VALUES
+( -128, -32768, -2147483648, arrow_cast(-9223372036854775808,'Int64'), 0, 0, 0, arrow_cast(0,'UInt64'), 1.1, 1.1, 1.1 ),
+( -128, -32768, -2147483648, arrow_cast(-9223372036854775808,'Int64'), 0, 0, 0, arrow_cast(0,'UInt64'), 4.4, 4.4, arrow_cast('NAN','Float64') ),
+( 100,  100,    100,         arrow_cast(100,'Int64'),              100,100,100, arrow_cast(100,'UInt64'), 3.3, 3.3, arrow_cast('NAN','Float64') ),
+( 127, 32767, 2147483647, arrow_cast(9223372036854775807,'Int64'), 255, 65535,  4294967295, 18446744073709551615, 2.2, 2.2, arrow_cast('NAN','Float64') )
+
+statement ok
+CREATE TABLE test (c1 BIGINT,c2 BIGINT) as values
+(0,null), (1,1), (null,1), (3,2), (3,2)
 
 #######
 # Error tests
@@ -307,73 +357,73 @@ SELECT median(c12) FROM aggregate_test_100
 
 # median_i8
 query I
-SELECT median(a) FROM median_i8
+SELECT median(col_i8) FROM median_table
 ----
 -14
 
 # median_i16
 query I
-SELECT median(a) FROM median_i16
+SELECT median(col_i16) FROM median_table
 ----
 -16334
 
 # median_i32
 query I
-SELECT median(a) FROM median_i32
+SELECT median(col_i32) FROM median_table
 ----
 -1073741774
 
 # median_i64
 query I
-SELECT median(a) FROM median_i64
+SELECT median(col_i64) FROM median_table
 ----
 -4611686018427387854
 
 # median_u8
 query I
-SELECT median(a) FROM median_u8
+SELECT median(col_u8) FROM median_table
 ----
 50
 
 # median_u16
 query I
-SELECT median(a) FROM median_u16
+SELECT median(col_u16) FROM median_table
 ----
 50
 
 # median_u32
 query I
-SELECT median(a) FROM median_u32
+SELECT median(col_u32) FROM median_table
 ----
 50
 
 # median_u64
 query I
-SELECT median(a) FROM median_u64
+SELECT median(col_u64) FROM median_table
 ----
 50
 
 # median_f32
 query R
-SELECT median(a) FROM median_f32
+SELECT median(col_f32) FROM median_table
 ----
-3.3
+2.75
 
 # median_f64
 query R
-SELECT median(a) FROM median_f64
+SELECT median(col_f64) FROM median_table
 ----
-3.3
+2.75
 
 # median_f64_nan
 query R
-SELECT median(a) FROM median_f64_nan
+SELECT median(col_f64_nan) FROM median_table
 ----
 NaN
 
 # approx_median_f64_nan
 query R
-SELECT approx_median(a) FROM median_f64_nan
+SELECT approx_median(col_f64_nan) FROM median_table
 ----
 NaN