You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by li...@apache.org on 2022/07/30 02:18:06 UTC
[arrow-datafusion] branch master updated: test: add file/SQL level test for pruning parquet row group with decimal data type. (#2977)
This is an automated email from the ASF dual-hosted git repository.
liukun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 3d1de1557 test: add file/SQL level test for pruning parquet row group with decimal data type. (#2977)
3d1de1557 is described below
commit 3d1de1557143efa7514e73839f5c54a6808d388c
Author: Kun Liu <li...@apache.org>
AuthorDate: Sat Jul 30 10:18:01 2022 +0800
test: add file/SQL level test for pruning parquet row group with decimal data type. (#2977)
---
datafusion/core/tests/parquet_pruning.rs | 186 +++++++++++++++++++++++++++++++
1 file changed, 186 insertions(+)
diff --git a/datafusion/core/tests/parquet_pruning.rs b/datafusion/core/tests/parquet_pruning.rs
index b4bf6a0c0..df7be0fcf 100644
--- a/datafusion/core/tests/parquet_pruning.rs
+++ b/datafusion/core/tests/parquet_pruning.rs
@@ -19,6 +19,7 @@
// data into a parquet file and then
use std::sync::Arc;
+use arrow::array::Decimal128Array;
use arrow::{
array::{
Array, ArrayRef, Date32Array, Date64Array, Float64Array, Int32Array, StringArray,
@@ -449,6 +450,154 @@ async fn prune_int32_eq_in_list_negated() {
assert_eq!(output.result_rows, 19, "{}", output.description());
}
+async fn test_prune_decimal(
+ decimal_case_type: Scenario,
+ sql: &str,
+ expected_errors: Option<usize>,
+ expected_row_group_pruned: Option<usize>,
+ expected_results: usize,
+) {
+ let output = ContextWithParquet::new(decimal_case_type)
+ .await
+ .query(sql)
+ .await;
+
+ println!("{}", output.description());
+ assert_eq!(output.predicate_evaluation_errors(), expected_errors);
+ assert_eq!(output.row_groups_pruned(), expected_row_group_pruned);
+ assert_eq!(
+ output.result_rows,
+ expected_results,
+ "{}",
+ output.description()
+ );
+}
+
+#[tokio::test]
+async fn prune_decimal_lt() {
+ // The data type of decimal_col is decimal(9,2)
+ // There are three row groups:
+ // [1.00, 6.00], [-5.00,6.00], [20.00,60.00]
+ test_prune_decimal(
+ Scenario::Decimal,
+ "SELECT * FROM t where decimal_col < 4",
+ Some(0),
+ Some(1),
+ 6,
+ )
+ .await;
+ // compare with the casted decimal value
+ test_prune_decimal(
+ Scenario::Decimal,
+ "SELECT * FROM t where decimal_col < cast(4.55 as decimal(20,2))",
+ Some(0),
+ Some(1),
+ 8,
+ )
+ .await;
+
+ // The data type of decimal_col is decimal(38,2)
+ test_prune_decimal(
+ Scenario::DecimalLargePrecision,
+ "SELECT * FROM t where decimal_col < 4",
+ Some(0),
+ Some(1),
+ 6,
+ )
+ .await;
+ // compare with the casted decimal value
+ test_prune_decimal(
+ Scenario::DecimalLargePrecision,
+ "SELECT * FROM t where decimal_col < cast(4.55 as decimal(20,2))",
+ Some(0),
+ Some(1),
+ 8,
+ )
+ .await;
+}
+
+#[tokio::test]
+async fn prune_decimal_eq() {
+ // The data type of decimal_col is decimal(9,2)
+ // There are three row groups:
+ // [1.00, 6.00], [-5.00,6.00], [20.00,60.00]
+ test_prune_decimal(
+ Scenario::Decimal,
+ "SELECT * FROM t where decimal_col = 4",
+ Some(0),
+ Some(1),
+ 2,
+ )
+ .await;
+ test_prune_decimal(
+ Scenario::Decimal,
+ "SELECT * FROM t where decimal_col = 4.00",
+ Some(0),
+ Some(1),
+ 2,
+ )
+ .await;
+
+ // The data type of decimal_col is decimal(38,2)
+ test_prune_decimal(
+ Scenario::DecimalLargePrecision,
+ "SELECT * FROM t where decimal_col = 4",
+ Some(0),
+ Some(1),
+ 2,
+ )
+ .await;
+ test_prune_decimal(
+ Scenario::DecimalLargePrecision,
+ "SELECT * FROM t where decimal_col = 4.00",
+ Some(0),
+ Some(1),
+ 2,
+ )
+ .await;
+}
+
+#[tokio::test]
+async fn prune_decimal_in_list() {
+ // The data type of decimal_col is decimal(9,2)
+ // There are three row groups:
+ // [1.00, 6.00], [-5.00,6.00], [20.00,60.00]
+ test_prune_decimal(
+ Scenario::Decimal,
+ "SELECT * FROM t where decimal_col in (4,3,2,123456789123)",
+ Some(0),
+ Some(1),
+ 5,
+ )
+ .await;
+ test_prune_decimal(
+ Scenario::Decimal,
+ "SELECT * FROM t where decimal_col in (4.00,3.00,11.2345,1)",
+ Some(0),
+ Some(1),
+ 6,
+ )
+ .await;
+
+ // The data type of decimal_col is decimal(38,2)
+ test_prune_decimal(
+ Scenario::DecimalLargePrecision,
+ "SELECT * FROM t where decimal_col in (4,3,2,123456789123)",
+ Some(0),
+ Some(1),
+ 5,
+ )
+ .await;
+ test_prune_decimal(
+ Scenario::DecimalLargePrecision,
+ "SELECT * FROM t where decimal_col in (4.00,3.00,11.2345,1)",
+ Some(0),
+ Some(1),
+ 6,
+ )
+ .await;
+}
+
// ----------------------
// Begin test fixture
// ----------------------
@@ -459,6 +608,8 @@ enum Scenario {
Dates,
Int32,
Float64,
+ Decimal,
+ DecimalLargePrecision,
}
/// Test fixture that has an execution context that has an external
@@ -681,6 +832,23 @@ async fn make_test_file(scenario: Scenario) -> NamedTempFile {
make_f64_batch(vec![5.0, 6.0, 7.0, 8.0, 9.0]),
]
}
+ Scenario::Decimal => {
+ // decimal record batch
+ vec![
+ make_decimal_batch(vec![100, 200, 300, 400, 600], 9, 2),
+ make_decimal_batch(vec![-500, 100, 300, 400, 600], 9, 2),
+ make_decimal_batch(vec![2000, 3000, 3000, 4000, 6000], 9, 2),
+ ]
+ }
+ Scenario::DecimalLargePrecision => {
+ // decimal record batch with large precision,
+ // and the data will stored as FIXED_LENGTH_BYTE_ARRAY
+ vec![
+ make_decimal_batch(vec![100, 200, 300, 400, 600], 38, 2),
+ make_decimal_batch(vec![-500, 100, 300, 400, 600], 38, 2),
+ make_decimal_batch(vec![2000, 3000, 3000, 4000, 6000], 38, 2),
+ ]
+ }
};
let schema = batches[0].schema();
@@ -799,6 +967,24 @@ fn make_f64_batch(v: Vec<f64>) -> RecordBatch {
RecordBatch::try_new(schema, vec![array.clone()]).unwrap()
}
+/// Return record batch with decimal vector
+///
+/// Columns are named
+/// "decimal_col" -> DecimalArray
+fn make_decimal_batch(v: Vec<i128>, precision: usize, scale: usize) -> RecordBatch {
+ let schema = Arc::new(Schema::new(vec![Field::new(
+ "decimal_col",
+ DataType::Decimal(precision, scale),
+ true,
+ )]));
+ let array = Arc::new(
+ Decimal128Array::from_iter_values(v)
+ .with_precision_and_scale(precision, scale)
+ .unwrap(),
+ ) as ArrayRef;
+ RecordBatch::try_new(schema, vec![array.clone()]).unwrap()
+}
+
/// Return record batch with a few rows of data for all of the supported date
/// types with the specified offset (in days)
///