You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2024/01/23 11:26:39 UTC
(arrow-datafusion) branch main updated: fix issue #8922 make row group test more readable (#8941)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 31b9b48b08 fix issue #8922 make row group test more readable (#8941)
31b9b48b08 is described below

commit 31b9b48b08592b7d293f46e75707aad7dadd7cbc
Author: Lordworms <48...@users.noreply.github.com>
AuthorDate: Tue Jan 23 05:26:33 2024 -0600

    fix issue #8922 make row group test more readable (#8941)
---
 datafusion/core/tests/parquet/row_group_pruning.rs | 842 +++++++++++----------
 1 file changed, 431 insertions(+), 411 deletions(-)

diff --git a/datafusion/core/tests/parquet/row_group_pruning.rs b/datafusion/core/tests/parquet/row_group_pruning.rs
index fc1b66efed..c8cac5dd9b 100644
--- a/datafusion/core/tests/parquet/row_group_pruning.rs
+++ b/datafusion/core/tests/parquet/row_group_pruning.rs
@@ -25,138 +25,164 @@ use itertools::Itertools;
 use crate::parquet::Unit::RowGroup;
 use crate::parquet::{ContextWithParquet, Scenario};
 use datafusion_expr::{col, lit};
-
-async fn test_row_group_prune(
-    case_data_type: Scenario,
-    sql: &str,
+struct RowGroupPruningTest {
+    scenario: Scenario,
+    query: String,
     expected_errors: Option<usize>,
     expected_row_group_pruned_by_statistics: Option<usize>,
     expected_row_group_pruned_by_bloom_filter: Option<usize>,
     expected_results: usize,
-) {
-    let output = ContextWithParquet::new(case_data_type, RowGroup)
-        .await
-        .query(sql)
-        .await;
-
-    println!("{}", output.description());
-    assert_eq!(output.predicate_evaluation_errors(), expected_errors);
-    assert_eq!(
-        output.row_groups_pruned_statistics(),
-        expected_row_group_pruned_by_statistics
-    );
-    assert_eq!(
-        output.row_groups_pruned_bloom_filter(),
-        expected_row_group_pruned_by_bloom_filter
-    );
-    assert_eq!(
-        output.result_rows,
-        expected_results,
-        "{}",
-        output.description()
-    );
 }
-
-/// check row group pruning by bloom filter and statistics independently
-async fn test_prune_verbose(
-    case_data_type: Scenario,
-    sql: &str,
-    expected_errors: Option<usize>,
-    expected_row_group_pruned_sbbf: Option<usize>,
-    expected_row_group_pruned_statistics: Option<usize>,
-    expected_results: usize,
-) {
-    let output = ContextWithParquet::new(case_data_type, RowGroup)
-        .await
-        .query(sql)
-        .await;
-
-    println!("{}", output.description());
-    assert_eq!(output.predicate_evaluation_errors(), expected_errors);
-    assert_eq!(
-        output.row_groups_pruned_bloom_filter(),
-        expected_row_group_pruned_sbbf
-    );
-    assert_eq!(
-        output.row_groups_pruned_statistics(),
-        expected_row_group_pruned_statistics
-    );
-    assert_eq!(
-        output.result_rows,
-        expected_results,
-        "{}",
-        output.description()
-    );
+impl RowGroupPruningTest {
+    // Start building the test configuration
+    fn new() -> Self {
+        Self {
+            scenario: Scenario::Timestamps, // or another default
+            query: String::new(),
+            expected_errors: None,
+            expected_row_group_pruned_by_statistics: None,
+            expected_row_group_pruned_by_bloom_filter: None,
+            expected_results: 0,
+        }
+    }
+
+    // Set the scenario for the test
+    fn with_scenario(mut self, scenario: Scenario) -> Self {
+        self.scenario = scenario;
+        self
+    }
+
+    // Set the SQL query for the test
+    fn with_query(mut self, query: &str) -> Self {
+        self.query = query.to_string();
+        self
+    }
+
+    // Set the expected errors for the test
+    fn with_expected_errors(mut self, errors: Option<usize>) -> Self {
+        self.expected_errors = errors;
+        self
+    }
+
+    // Set the expected pruned row groups by statistics
+    fn with_pruned_by_stats(mut self, pruned_by_stats: Option<usize>) -> Self {
+        self.expected_row_group_pruned_by_statistics = pruned_by_stats;
+        self
+    }
+
+    // Set the expected pruned row groups by bloom filter
+    fn with_pruned_by_bloom_filter(mut self, pruned_by_bf: Option<usize>) -> Self {
+        self.expected_row_group_pruned_by_bloom_filter = pruned_by_bf;
+        self
+    }
+
+    // Set the expected rows for the test
+    fn with_expected_rows(mut self, rows: usize) -> Self {
+        self.expected_results = rows;
+        self
+    }
+
+    // Execute the test with the current configuration
+    async fn test_row_group_prune(self) {
+        let output = ContextWithParquet::new(self.scenario, RowGroup)
+            .await
+            .query(&self.query)
+            .await;
+
+        println!("{}", output.description());
+        assert_eq!(output.predicate_evaluation_errors(), self.expected_errors);
+        assert_eq!(
+            output.row_groups_pruned_statistics(),
+            self.expected_row_group_pruned_by_statistics
+        );
+        assert_eq!(
+            output.row_groups_pruned_bloom_filter(),
+            self.expected_row_group_pruned_by_bloom_filter
+        );
+        assert_eq!(
+            output.result_rows,
+            self.expected_results,
+            "{}",
+            output.description()
+        );
+    }
 }
 
 #[tokio::test]
 async fn prune_timestamps_nanos() {
-    test_row_group_prune(
-        Scenario::Timestamps,
-        "SELECT * FROM t where nanos < to_timestamp('2020-01-02 01:01:11Z')",
-        Some(0),
-        Some(1),
-        Some(0),
-        10,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Timestamps)
+        .with_query("SELECT * FROM t where nanos < to_timestamp('2020-01-02 01:01:11Z')")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(10)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_timestamps_micros() {
-    test_row_group_prune(
-        Scenario::Timestamps,
-        "SELECT * FROM t where micros < to_timestamp_micros('2020-01-02 01:01:11Z')",
-        Some(0),
-        Some(1),
-        Some(0),
-        10,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Timestamps)
+        .with_query(
+            "SELECT * FROM t where micros < to_timestamp_micros('2020-01-02 01:01:11Z')",
+        )
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(10)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_timestamps_millis() {
-    test_row_group_prune(
-        Scenario::Timestamps,
-        "SELECT * FROM t where millis < to_timestamp_millis('2020-01-02 01:01:11Z')",
-        Some(0),
-        Some(1),
-        Some(0),
-        10,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Timestamps)
+        .with_query(
+            "SELECT * FROM t where micros < to_timestamp_millis('2020-01-02 01:01:11Z')",
+        )
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(10)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_timestamps_seconds() {
-    test_row_group_prune(
-        Scenario::Timestamps,
-        "SELECT * FROM t where seconds < to_timestamp_seconds('2020-01-02 01:01:11Z')",
-        Some(0),
-        Some(1),
-        Some(0),
-        10,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Timestamps)
+        .with_query(
+            "SELECT * FROM t where seconds < to_timestamp_seconds('2020-01-02 01:01:11Z')",
+        )
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(10)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_date32() {
-    test_row_group_prune(
-        Scenario::Dates,
-        "SELECT * FROM t where date32 < cast('2020-01-02' as date)",
-        Some(0),
-        Some(3),
-        Some(0),
-        1,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Dates)
+        .with_query("SELECT * FROM t where date32 < cast('2020-01-02' as date)")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(3))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(1)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_date64() {
     // work around for not being able to cast Date32 to Date64 automatically
+
     let date = "2020-01-02"
         .parse::<chrono::NaiveDate>()
         .unwrap()
@@ -181,15 +207,15 @@ async fn prune_date64() {
 
 #[tokio::test]
 async fn prune_disabled() {
-    test_row_group_prune(
-        Scenario::Timestamps,
-        "SELECT * FROM t where nanos < to_timestamp('2020-01-02 01:01:11Z')",
-        Some(0),
-        Some(1),
-        Some(0),
-        10,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Timestamps)
+        .with_query("SELECT * FROM t where nanos < to_timestamp('2020-01-02 01:01:11Z')")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(10)
+        .test_row_group_prune()
+        .await;
 
     // test without pruning
     let query = "SELECT * FROM t where nanos < to_timestamp('2020-01-02 01:01:11Z')";
@@ -215,232 +241,233 @@ async fn prune_disabled() {
 
 #[tokio::test]
 async fn prune_int32_lt() {
-    test_row_group_prune(
-        Scenario::Int32,
-        "SELECT * FROM t where i < 1",
-        Some(0),
-        Some(1),
-        Some(0),
-        11,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where i < 1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(11)
+        .test_row_group_prune()
+        .await;
+
     // result of sql "SELECT * FROM t where i < 1" is same as
     // "SELECT * FROM t where -i > -1"
-    test_row_group_prune(
-        Scenario::Int32,
-        "SELECT * FROM t where -i > -1",
-        Some(0),
-        Some(1),
-        Some(0),
-        11,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where -i > -1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(11)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_int32_eq() {
-    test_row_group_prune(
-        Scenario::Int32,
-        "SELECT * FROM t where i = 1",
-        Some(0),
-        Some(3),
-        Some(0),
-        1,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where i = 1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(3))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(1)
+        .test_row_group_prune()
+        .await;
 }
 #[tokio::test]
 async fn prune_int32_scalar_fun_and_eq() {
-    test_row_group_prune(
-        Scenario::Int32,
-        "SELECT * FROM t where abs(i) = 1  and i = 1",
-        Some(0),
-        Some(3),
-        Some(0),
-        1,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where i = 1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(3))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(1)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_int32_scalar_fun() {
-    test_row_group_prune(
-        Scenario::Int32,
-        "SELECT * FROM t where abs(i) = 1",
-        Some(0),
-        Some(0),
-        Some(0),
-        3,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where abs(i) = 1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(0))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(3)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_int32_complex_expr() {
-    test_row_group_prune(
-        Scenario::Int32,
-        "SELECT * FROM t where i+1 = 1",
-        Some(0),
-        Some(0),
-        Some(0),
-        2,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where i+1 = 1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(0))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(2)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_int32_complex_expr_subtract() {
-    test_row_group_prune(
-        Scenario::Int32,
-        "SELECT * FROM t where 1-i > 1",
-        Some(0),
-        Some(0),
-        Some(0),
-        9,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where 1-i > 1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(0))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(9)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_f64_lt() {
-    test_row_group_prune(
-        Scenario::Float64,
-        "SELECT * FROM t where f < 1",
-        Some(0),
-        Some(1),
-        Some(0),
-        11,
-    )
-    .await;
-    test_row_group_prune(
-        Scenario::Float64,
-        "SELECT * FROM t where -f > -1",
-        Some(0),
-        Some(1),
-        Some(0),
-        11,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Float64)
+        .with_query("SELECT * FROM t where f < 1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(11)
+        .test_row_group_prune()
+        .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Float64)
+        .with_query("SELECT * FROM t where -f > -1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(11)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_f64_scalar_fun_and_gt() {
     // result of sql "SELECT * FROM t where abs(f - 1) <= 0.000001  and f >= 0.1"
     // only use "f >= 0" to prune
-    test_row_group_prune(
-        Scenario::Float64,
-        "SELECT * FROM t where abs(f - 1) <= 0.000001  and f >= 0.1",
-        Some(0),
-        Some(2),
-        Some(0),
-        1,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Float64)
+        .with_query("SELECT * FROM t where abs(f - 1) <= 0.000001  and f >= 0.1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(2))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(1)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_f64_scalar_fun() {
     // result of sql "SELECT * FROM t where abs(f-1) <= 0.000001" is not supported
-    test_row_group_prune(
-        Scenario::Float64,
-        "SELECT * FROM t where abs(f-1) <= 0.000001",
-        Some(0),
-        Some(0),
-        Some(0),
-        1,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Float64)
+        .with_query("SELECT * FROM t where abs(f-1) <= 0.000001")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(0))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(1)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_f64_complex_expr() {
     // result of sql "SELECT * FROM t where f+1 > 1.1"" is not supported
-    test_row_group_prune(
-        Scenario::Float64,
-        "SELECT * FROM t where f+1 > 1.1",
-        Some(0),
-        Some(0),
-        Some(0),
-        9,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Float64)
+        .with_query("SELECT * FROM t where f+1 > 1.1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(0))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(9)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_f64_complex_expr_subtract() {
     // result of sql "SELECT * FROM t where 1-f > 1" is not supported
-    test_row_group_prune(
-        Scenario::Float64,
-        "SELECT * FROM t where 1-f > 1",
-        Some(0),
-        Some(0),
-        Some(0),
-        9,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Float64)
+        .with_query("SELECT * FROM t where 1-f > 1")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(0))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(9)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_int32_eq_in_list() {
     // result of sql "SELECT * FROM t where in (1)"
-    test_row_group_prune(
-        Scenario::Int32,
-        "SELECT * FROM t where i in (1)",
-        Some(0),
-        Some(3),
-        Some(0),
-        1,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where i in (1)")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(3))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(1)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_int32_eq_in_list_2() {
     // result of sql "SELECT * FROM t where in (1000)", prune all
     // test whether statistics works
-    test_prune_verbose(
-        Scenario::Int32,
-        "SELECT * FROM t where i in (1000)",
-        Some(0),
-        Some(0),
-        Some(4),
-        0,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where i in (1000)")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(4))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(0)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_int32_eq_large_in_list() {
     // result of sql "SELECT * FROM t where i in (2050...2582)", prune all
-    // test whether sbbf works
-    test_prune_verbose(
-        Scenario::Int32Range,
-        format!(
-            "SELECT * FROM t where i in ({})",
-            (200050..200082).join(",")
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32Range)
+        .with_query(
+            format!(
+                "SELECT * FROM t where i in ({})",
+                (200050..200082).join(",")
+            )
+            .as_str(),
         )
-        .as_str(),
-        Some(0),
-        Some(1),
-        // we don't support pruning by statistics for in_list with more than 20 elements currently
-        Some(0),
-        0,
-    )
-    .await;
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(0))
+        .with_pruned_by_bloom_filter(Some(1))
+        .with_expected_rows(0)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
 async fn prune_int32_eq_in_list_negated() {
     // result of sql "SELECT * FROM t where not in (1)" prune nothing
-    test_row_group_prune(
-        Scenario::Int32,
-        "SELECT * FROM t where i not in (1)",
-        Some(0),
-        Some(0),
-        Some(0),
-        19,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Int32)
+        .with_query("SELECT * FROM t where i not in (1)")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(0))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(19)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
@@ -448,46 +475,42 @@ async fn prune_decimal_lt() {
     // The data type of decimal_col is decimal(9,2)
     // There are three row groups:
     // [1.00, 6.00], [-5.00,6.00], [20.00,60.00]
-    test_row_group_prune(
-        Scenario::Decimal,
-        "SELECT * FROM t where decimal_col < 4",
-        Some(0),
-        Some(1),
-        Some(0),
-        6,
-    )
-    .await;
-    // compare with the casted decimal value
-    test_row_group_prune(
-        Scenario::Decimal,
-        "SELECT * FROM t where decimal_col < cast(4.55 as decimal(20,2))",
-        Some(0),
-        Some(1),
-        Some(0),
-        8,
-    )
-    .await;
-
-    // The data type of decimal_col is decimal(38,2)
-    test_row_group_prune(
-        Scenario::DecimalLargePrecision,
-        "SELECT * FROM t where decimal_col < 4",
-        Some(0),
-        Some(1),
-        Some(0),
-        6,
-    )
-    .await;
-    // compare with the casted decimal value
-    test_row_group_prune(
-        Scenario::DecimalLargePrecision,
-        "SELECT * FROM t where decimal_col < cast(4.55 as decimal(20,2))",
-        Some(0),
-        Some(1),
-        Some(0),
-        8,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Decimal)
+        .with_query("SELECT * FROM t where decimal_col < 4")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(6)
+        .test_row_group_prune()
+        .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Decimal)
+        .with_query("SELECT * FROM t where decimal_col < cast(4.55 as decimal(20,2))")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(8)
+        .test_row_group_prune()
+        .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::DecimalLargePrecision)
+        .with_query("SELECT * FROM t where decimal_col < 4")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(6)
+        .test_row_group_prune()
+        .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::DecimalLargePrecision)
+        .with_query("SELECT * FROM t where decimal_col < cast(4.55 as decimal(20,2))")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(8)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
@@ -495,44 +518,44 @@ async fn prune_decimal_eq() {
     // The data type of decimal_col is decimal(9,2)
     // There are three row groups:
     // [1.00, 6.00], [-5.00,6.00], [20.00,60.00]
-    test_row_group_prune(
-        Scenario::Decimal,
-        "SELECT * FROM t where decimal_col = 4",
-        Some(0),
-        Some(1),
-        Some(0),
-        2,
-    )
-    .await;
-    test_row_group_prune(
-        Scenario::Decimal,
-        "SELECT * FROM t where decimal_col = 4.00",
-        Some(0),
-        Some(1),
-        Some(0),
-        2,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Decimal)
+        .with_query("SELECT * FROM t where decimal_col = 4")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(2)
+        .test_row_group_prune()
+        .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Decimal)
+        .with_query("SELECT * FROM t where decimal_col = 4.00")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(2)
+        .test_row_group_prune()
+        .await;
 
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::DecimalLargePrecision)
+        .with_query("SELECT * FROM t where decimal_col = 4")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(2)
+        .test_row_group_prune()
+        .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::DecimalLargePrecision)
+        .with_query("SELECT * FROM t where decimal_col = 4.00")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(2)
+        .test_row_group_prune()
+        .await;
     // The data type of decimal_col is decimal(38,2)
-    test_row_group_prune(
-        Scenario::DecimalLargePrecision,
-        "SELECT * FROM t where decimal_col = 4",
-        Some(0),
-        Some(1),
-        Some(0),
-        2,
-    )
-    .await;
-    test_row_group_prune(
-        Scenario::DecimalLargePrecision,
-        "SELECT * FROM t where decimal_col = 4.00",
-        Some(0),
-        Some(1),
-        Some(0),
-        2,
-    )
-    .await;
 }
 
 #[tokio::test]
@@ -540,44 +563,42 @@ async fn prune_decimal_in_list() {
     // The data type of decimal_col is decimal(9,2)
     // There are three row groups:
     // [1.00, 6.00], [-5.00,6.00], [20.00,60.00]
-    test_row_group_prune(
-        Scenario::Decimal,
-        "SELECT * FROM t where decimal_col in (4,3,2,123456789123)",
-        Some(0),
-        Some(1),
-        Some(0),
-        5,
-    )
-    .await;
-    test_row_group_prune(
-        Scenario::Decimal,
-        "SELECT * FROM t where decimal_col in (4.00,3.00,11.2345,1)",
-        Some(0),
-        Some(1),
-        Some(0),
-        6,
-    )
-    .await;
-
-    // The data type of decimal_col is decimal(38,2)
-    test_row_group_prune(
-        Scenario::DecimalLargePrecision,
-        "SELECT * FROM t where decimal_col in (4,3,2,123456789123)",
-        Some(0),
-        Some(1),
-        Some(0),
-        5,
-    )
-    .await;
-    test_row_group_prune(
-        Scenario::DecimalLargePrecision,
-        "SELECT * FROM t where decimal_col in (4.00,3.00,11.2345,1)",
-        Some(0),
-        Some(1),
-        Some(0),
-        6,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Decimal)
+        .with_query("SELECT * FROM t where decimal_col in (4,3,2,123456789123)")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(5)
+        .test_row_group_prune()
+        .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Decimal)
+        .with_query("SELECT * FROM t where decimal_col in (4.00,3.00,11.2345,1)")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(6)
+        .test_row_group_prune()
+        .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::Decimal)
+        .with_query("SELECT * FROM t where decimal_col in (4,3,2,123456789123)")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(5)
+        .test_row_group_prune()
+        .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::DecimalLargePrecision)
+        .with_query("SELECT * FROM t where decimal_col in (4.00,3.00,11.2345,1)")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(6)
+        .test_row_group_prune()
+        .await;
 }
 
 #[tokio::test]
@@ -586,32 +607,31 @@ async fn prune_periods_in_column_names() {
     // name = "HTTP GET / DISPATCH", service.name = ['frontend', 'frontend'],
     // name = "HTTP PUT / DISPATCH", service.name = ['backend',  'frontend'],
     // name = "HTTP GET / DISPATCH", service.name = ['backend',  'backend' ],
-    test_row_group_prune(
-        Scenario::PeriodsInColumnNames,
-        // use double quotes to use column named "service.name"
-        "SELECT \"name\", \"service.name\" FROM t WHERE \"service.name\" = 'frontend'",
-        Some(0),
-        Some(1), // prune out last row group
-        Some(0),
-        7,
-    )
-    .await;
-    test_row_group_prune(
-        Scenario::PeriodsInColumnNames,
-        "SELECT \"name\", \"service.name\" FROM t WHERE \"name\" != 'HTTP GET / DISPATCH'",
-        Some(0),
-        Some(2), // prune out first and last row group
-        Some(0),
-        5,
-    )
-    .await;
-    test_row_group_prune(
-        Scenario::PeriodsInColumnNames,
-        "SELECT \"name\", \"service.name\" FROM t WHERE \"service.name\" = 'frontend' AND \"name\" != 'HTTP GET / DISPATCH'",
-        Some(0),
-        Some(2), // prune out middle and last row group
-        Some(0),
-        2,
-    )
-    .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::PeriodsInColumnNames)
+        .with_query(  "SELECT \"name\", \"service.name\" FROM t WHERE \"service.name\" = 'frontend'")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(1))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(7)
+        .test_row_group_prune()
+        .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::PeriodsInColumnNames)
+        .with_query(  "SELECT \"name\", \"service.name\" FROM t WHERE \"name\" != 'HTTP GET / DISPATCH'")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(2))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(5)
+        .test_row_group_prune()
+        .await;
+    RowGroupPruningTest::new()
+        .with_scenario(Scenario::PeriodsInColumnNames)
+        .with_query(  "SELECT \"name\", \"service.name\" FROM t WHERE \"service.name\" = 'frontend' AND \"name\" != 'HTTP GET / DISPATCH'")
+        .with_expected_errors(Some(0))
+        .with_pruned_by_stats(Some(2))
+        .with_pruned_by_bloom_filter(Some(0))
+        .with_expected_rows(2)
+        .test_row_group_prune()
+        .await;
 }