You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/06/14 20:31:20 UTC
[arrow-datafusion] branch main updated: minor: use sql to setup test data for aggregate.slt rather than rust (#6664)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new c3d5d77e44 minor: use sql to setup test data for aggregate.slt rather than rust (#6664)
c3d5d77e44 is described below
commit c3d5d77e447e51c2cca814a67706e5ab3e050ced
Author: zhenxing jiang <ji...@gmail.com>
AuthorDate: Thu Jun 15 04:31:13 2023 +0800
minor: use sql to setup test data for aggregate.slt rather than rust (#6664)
* port register_aggregate_test_100 to sql
* port register_decimal_table to sql
* port register_median_test_tables to sql
* port register_test_data to sql
---
datafusion/core/tests/sqllogictests/src/main.rs | 5 -
datafusion/core/tests/sqllogictests/src/setup.rs | 137 +--------------------
datafusion/core/tests/sqllogictests/src/utils.rs | 40 ------
.../tests/sqllogictests/test_files/aggregate.slt | 78 +++++++++---
4 files changed, 66 insertions(+), 194 deletions(-)
diff --git a/datafusion/core/tests/sqllogictests/src/main.rs b/datafusion/core/tests/sqllogictests/src/main.rs
index adb3be9a6e..f71e98b54b 100644
--- a/datafusion/core/tests/sqllogictests/src/main.rs
+++ b/datafusion/core/tests/sqllogictests/src/main.rs
@@ -33,7 +33,6 @@ use crate::engines::postgres::Postgres;
mod engines;
mod setup;
-mod utils;
const TEST_DIRECTORY: &str = "tests/sqllogictests/test_files/";
const PG_COMPAT_FILE_PREFIX: &str = "pg_compat_";
@@ -263,10 +262,6 @@ async fn context_for_test_file(relative_path: &Path) -> Option<TestContext> {
let file_name = relative_path.file_name().unwrap().to_str().unwrap();
match file_name {
- "aggregate.slt" => {
- info!("Registering aggregate tables");
- setup::register_aggregate_tables(test_ctx.session_ctx()).await;
- }
"scalar.slt" => {
info!("Registering scalar tables");
setup::register_scalar_tables(test_ctx.session_ctx()).await;
diff --git a/datafusion/core/tests/sqllogictests/src/setup.rs b/datafusion/core/tests/sqllogictests/src/setup.rs
index 26f9d2501a..34365f509a 100644
--- a/datafusion/core/tests/sqllogictests/src/setup.rs
+++ b/datafusion/core/tests/sqllogictests/src/setup.rs
@@ -17,22 +17,17 @@
use datafusion::{
arrow::{
- array::{
- ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array,
- Int8Array, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
- },
+ array::Float64Array,
datatypes::{DataType, Field, Schema},
record_batch::RecordBatch,
},
- datasource::MemTable,
prelude::{CsvReadOptions, SessionContext},
- test_util,
};
use std::fs::File;
use std::io::Write;
use std::sync::Arc;
-use crate::{utils, TestContext};
+use crate::TestContext;
#[cfg(feature = "avro")]
pub async fn register_avro_tables(ctx: &mut crate::TestContext) {
@@ -66,134 +61,6 @@ pub async fn register_avro_tables(ctx: &mut crate::TestContext) {
.unwrap();
}
-pub async fn register_aggregate_tables(ctx: &SessionContext) {
- register_aggregate_test_100(ctx).await;
- register_decimal_table(ctx);
- register_median_test_tables(ctx);
- register_test_data(ctx);
-}
-
-fn register_median_test_tables(ctx: &SessionContext) {
- // Register median tables
- let items: Vec<(&str, DataType, ArrayRef)> = vec![
- (
- "i8",
- DataType::Int8,
- Arc::new(Int8Array::from(vec![i8::MIN, i8::MIN, 100, i8::MAX])),
- ),
- (
- "i16",
- DataType::Int16,
- Arc::new(Int16Array::from(vec![i16::MIN, i16::MIN, 100, i16::MAX])),
- ),
- (
- "i32",
- DataType::Int32,
- Arc::new(Int32Array::from(vec![i32::MIN, i32::MIN, 100, i32::MAX])),
- ),
- (
- "i64",
- DataType::Int64,
- Arc::new(Int64Array::from(vec![i64::MIN, i64::MIN, 100, i64::MAX])),
- ),
- (
- "u8",
- DataType::UInt8,
- Arc::new(UInt8Array::from(vec![u8::MIN, u8::MIN, 100, u8::MAX])),
- ),
- (
- "u16",
- DataType::UInt16,
- Arc::new(UInt16Array::from(vec![u16::MIN, u16::MIN, 100, u16::MAX])),
- ),
- (
- "u32",
- DataType::UInt32,
- Arc::new(UInt32Array::from(vec![u32::MIN, u32::MIN, 100, u32::MAX])),
- ),
- (
- "u64",
- DataType::UInt64,
- Arc::new(UInt64Array::from(vec![u64::MIN, u64::MIN, 100, u64::MAX])),
- ),
- (
- "f32",
- DataType::Float32,
- Arc::new(Float32Array::from(vec![1.1, 4.4, 5.5, 3.3, 2.2])),
- ),
- (
- "f64",
- DataType::Float64,
- Arc::new(Float64Array::from(vec![1.1, 4.4, 5.5, 3.3, 2.2])),
- ),
- (
- "f64_nan",
- DataType::Float64,
- Arc::new(Float64Array::from(vec![1.1, f64::NAN, f64::NAN, f64::NAN])),
- ),
- ];
-
- for (name, data_type, values) in items {
- let batch = RecordBatch::try_new(
- Arc::new(Schema::new(vec![Field::new("a", data_type, false)])),
- vec![values],
- )
- .unwrap();
- let table_name = &format!("median_{name}");
- ctx.register_batch(table_name, batch).unwrap();
- }
-}
-
-fn register_test_data(ctx: &SessionContext) {
- let schema = Arc::new(Schema::new(vec![
- Field::new("c1", DataType::Int64, true),
- Field::new("c2", DataType::Int64, true),
- ]));
-
- let data = RecordBatch::try_new(
- schema,
- vec![
- Arc::new(Int64Array::from(vec![
- Some(0),
- Some(1),
- None,
- Some(3),
- Some(3),
- ])),
- Arc::new(Int64Array::from(vec![
- None,
- Some(1),
- Some(1),
- Some(2),
- Some(2),
- ])),
- ],
- )
- .unwrap();
-
- ctx.register_batch("test", data).unwrap();
-}
-
-fn register_decimal_table(ctx: &SessionContext) {
- let batch_decimal = utils::make_decimal();
- let schema = batch_decimal.schema();
- let partitions = vec![vec![batch_decimal]];
- let provider = Arc::new(MemTable::try_new(schema, partitions).unwrap());
- ctx.register_table("d_table", provider).unwrap();
-}
-
-async fn register_aggregate_test_100(ctx: &SessionContext) {
- let test_data = datafusion::test_util::arrow_test_data();
- let schema = test_util::aggr_test_schema();
- ctx.register_csv(
- "aggregate_test_100",
- &format!("{test_data}/csv/aggregate_test_100.csv"),
- CsvReadOptions::new().schema(&schema),
- )
- .await
- .unwrap();
-}
-
pub async fn register_scalar_tables(ctx: &SessionContext) {
register_nan_table(ctx)
}
diff --git a/datafusion/core/tests/sqllogictests/src/utils.rs b/datafusion/core/tests/sqllogictests/src/utils.rs
deleted file mode 100644
index 4d064a76e2..0000000000
--- a/datafusion/core/tests/sqllogictests/src/utils.rs
+++ /dev/null
@@ -1,40 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use datafusion::arrow::{
- array::{Array, Decimal128Builder},
- datatypes::{Field, Schema},
- record_batch::RecordBatch,
-};
-use std::sync::Arc;
-
-// TODO: move this to datafusion::test_utils?
-pub fn make_decimal() -> RecordBatch {
- let mut decimal_builder = Decimal128Builder::with_capacity(20);
- for i in 110000..110010 {
- decimal_builder.append_value(i as i128);
- }
- for i in 100000..100010 {
- decimal_builder.append_value(-i as i128);
- }
- let array = decimal_builder
- .finish()
- .with_precision_and_scale(10, 3)
- .unwrap();
- let schema = Schema::new(vec![Field::new("c1", array.data_type().clone(), true)]);
- RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array)]).unwrap()
-}
diff --git a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
index ca1f8035f6..bf1ab2cbd1 100644
--- a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
@@ -15,6 +15,56 @@
# specific language governing permissions and limitations
# under the License.
+#######
+# Setup test data table
+#######
+statement ok
+CREATE EXTERNAL TABLE aggregate_test_100 (
+ c1 VARCHAR NOT NULL,
+ c2 TINYINT NOT NULL,
+ c3 SMALLINT NOT NULL,
+ c4 SMALLINT,
+ c5 INT,
+ c6 BIGINT NOT NULL,
+ c7 SMALLINT NOT NULL,
+ c8 INT NOT NULL,
+ c9 INT UNSIGNED NOT NULL,
+ c10 BIGINT UNSIGNED NOT NULL,
+ c11 FLOAT NOT NULL,
+ c12 DOUBLE NOT NULL,
+ c13 VARCHAR NOT NULL
+)
+STORED AS CSV
+WITH HEADER ROW
+LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+
+statement ok
+CREATE TABLE d_table (c1 decimal(10,3)) as values
+(110.000), (110.001), (110.002), (110.003), (110.004), (110.005), (110.006), (110.007), (110.008), (110.009),
+(-100.000),(-100.001),(-100.002),(-100.003),(-100.004),(-100.005),(-100.006),(-100.007),(-100.008),(-100.009)
+
+statement ok
+CREATE TABLE median_table (
+ col_i8 TINYINT,
+ col_i16 SMALLINT,
+ col_i32 INT,
+ col_i64 BIGINT,
+ col_u8 TINYINT UNSIGNED,
+ col_u16 SMALLINT UNSIGNED,
+ col_u32 INT UNSIGNED,
+ col_u64 BIGINT UNSIGNED,
+ col_f32 FLOAT,
+ col_f64 DOUBLE,
+ col_f64_nan DOUBLE
+) as VALUES
+( -128, -32768, -2147483648, arrow_cast(-9223372036854775808,'Int64'), 0, 0, 0, arrow_cast(0,'UInt64'), 1.1, 1.1, 1.1 ),
+( -128, -32768, -2147483648, arrow_cast(-9223372036854775808,'Int64'), 0, 0, 0, arrow_cast(0,'UInt64'), 4.4, 4.4, arrow_cast('NAN','Float64') ),
+( 100, 100, 100, arrow_cast(100,'Int64'), 100,100,100, arrow_cast(100,'UInt64'), 3.3, 3.3, arrow_cast('NAN','Float64') ),
+( 127, 32767, 2147483647, arrow_cast(9223372036854775807,'Int64'), 255, 65535, 4294967295, 18446744073709551615, 2.2, 2.2, arrow_cast('NAN','Float64') )
+
+statement ok
+CREATE TABLE test (c1 BIGINT,c2 BIGINT) as values
+(0,null), (1,1), (null,1), (3,2), (3,2)
#######
# Error tests
@@ -307,73 +357,73 @@ SELECT median(c12) FROM aggregate_test_100
# median_i8
query I
-SELECT median(a) FROM median_i8
+SELECT median(col_i8) FROM median_table
----
-14
# median_i16
query I
-SELECT median(a) FROM median_i16
+SELECT median(col_i16) FROM median_table
----
-16334
# median_i32
query I
-SELECT median(a) FROM median_i32
+SELECT median(col_i32) FROM median_table
----
-1073741774
# median_i64
query I
-SELECT median(a) FROM median_i64
+SELECT median(col_i64) FROM median_table
----
-4611686018427387854
# median_u8
query I
-SELECT median(a) FROM median_u8
+SELECT median(col_u8) FROM median_table
----
50
# median_u16
query I
-SELECT median(a) FROM median_u16
+SELECT median(col_u16) FROM median_table
----
50
# median_u32
query I
-SELECT median(a) FROM median_u32
+SELECT median(col_u32) FROM median_table
----
50
# median_u64
query I
-SELECT median(a) FROM median_u64
+SELECT median(col_u64) FROM median_table
----
50
# median_f32
query R
-SELECT median(a) FROM median_f32
+SELECT median(col_f32) FROM median_table
----
-3.3
+2.75
# median_f64
query R
-SELECT median(a) FROM median_f64
+SELECT median(col_f64) FROM median_table
----
-3.3
+2.75
# median_f64_nan
query R
-SELECT median(a) FROM median_f64_nan
+SELECT median(col_f64_nan) FROM median_table
----
NaN
# approx_median_f64_nan
query R
-SELECT approx_median(a) FROM median_f64_nan
+SELECT approx_median(col_f64_nan) FROM median_table
----
NaN