You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by nj...@apache.org on 2023/05/27 23:24:38 UTC

[arrow-datafusion] branch main updated: Continue PR 4757 (#6456)

This is an automated email from the ASF dual-hosted git repository.

nju_yaho pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new de2d1c5bac Continue PR 4757 (#6456)
de2d1c5bac is described below

commit de2d1c5bac1f1bd770394761a8c7bb58657315bb
Author: yahoNanJing <90...@users.noreply.github.com>
AuthorDate: Sun May 28 07:24:32 2023 +0800

    Continue PR 4757 (#6456)
    
    Co-authored-by: yangzhong <ya...@ebay.com>
---
 datafusion/execution/src/config.rs               | 24 ++++++++++++++++++++++++
 docs/source/user-guide/cli.md                    |  2 --
 docs/source/user-guide/sql/information_schema.md |  1 -
 3 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/datafusion/execution/src/config.rs b/datafusion/execution/src/config.rs
index 2867e7bd7c..97770eb99c 100644
--- a/datafusion/execution/src/config.rs
+++ b/datafusion/execution/src/config.rs
@@ -250,6 +250,30 @@ impl SessionConfig {
         self
     }
 
+    /// Enables or disables the coalescence of small batches into larger batches
+    pub fn with_coalesce_batches(mut self, enabled: bool) -> Self {
+        self.options.execution.coalesce_batches = enabled;
+        self
+    }
+
+    /// Returns true if record batches will be examined between each operator
+    /// and small batches will be coalesced into larger batches.
+    pub fn coalesce_batches(&self) -> bool {
+        self.options.execution.coalesce_batches
+    }
+
+    /// Enables or disables the round robin repartition for increasing parallelism
+    pub fn with_round_robin_repartition(mut self, enabled: bool) -> Self {
+        self.options.optimizer.enable_round_robin_repartition = enabled;
+        self
+    }
+
+    /// Returns true if the physical plan optimizer will try to
+    /// add round robin repartition to increase parallelism to leverage more CPU cores.
+    pub fn round_robin_repartition(&self) -> bool {
+        self.options.optimizer.enable_round_robin_repartition
+    }
+
     /// Convert configuration options to name-value pairs with values
     /// converted to strings.
     ///
diff --git a/docs/source/user-guide/cli.md b/docs/source/user-guide/cli.md
index ec0a917c27..53cceb8d0a 100644
--- a/docs/source/user-guide/cli.md
+++ b/docs/source/user-guide/cli.md
@@ -358,7 +358,6 @@ Available commands inside DataFusion CLI are:
 +-------------------------------------------------+---------+
 | datafusion.execution.batch_size                 | 8192    |
 | datafusion.execution.coalesce_batches           | true    |
-| datafusion.execution.coalesce_target_batch_size | 4096    |
 | datafusion.execution.time_zone                  | UTC     |
 | datafusion.explain.logical_plan_only            | false   |
 | datafusion.explain.physical_plan_only           | false   |
@@ -395,7 +394,6 @@ DataFusion CLI v12.0.0
 +-------------------------------------------------+---------+
 | datafusion.execution.batch_size                 | 1024    |
 | datafusion.execution.coalesce_batches           | true    |
-| datafusion.execution.coalesce_target_batch_size | 4096    |
 | datafusion.execution.time_zone                  | UTC     |
 | datafusion.explain.logical_plan_only            | false   |
 | datafusion.explain.physical_plan_only           | false   |
diff --git a/docs/source/user-guide/sql/information_schema.md b/docs/source/user-guide/sql/information_schema.md
index b3fcc843bd..ced6117349 100644
--- a/docs/source/user-guide/sql/information_schema.md
+++ b/docs/source/user-guide/sql/information_schema.md
@@ -62,7 +62,6 @@ To show the current session configuration options, use the `SHOW ALL` command or
 +-------------------------------------------------+---------+
 | datafusion.execution.batch_size                 | 8192    |
 | datafusion.execution.coalesce_batches           | true    |
-| datafusion.execution.coalesce_target_batch_size | 4096    |
 | datafusion.execution.time_zone                  | UTC     |
 | datafusion.explain.logical_plan_only            | false   |
 | datafusion.explain.physical_plan_only           | false   |