You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2023/01/12 17:41:51 UTC
[arrow-datafusion] branch master updated: Minor: Add docstrings to UnionExec (#4884)
This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 3b86643ab Minor: Add docstrings to UnionExec (#4884)
3b86643ab is described below
commit 3b86643ab966ce34f1c69fea6f4070437e27ff82
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Thu Jan 12 18:41:44 2023 +0100
Minor: Add docstrings to UnionExec (#4884)
---
datafusion/core/src/physical_plan/union.rs | 40 ++++++++++++++++++++++++++++--
1 file changed, 38 insertions(+), 2 deletions(-)
diff --git a/datafusion/core/src/physical_plan/union.rs b/datafusion/core/src/physical_plan/union.rs
index 9ffea72e8..921a0d99f 100644
--- a/datafusion/core/src/physical_plan/union.rs
+++ b/datafusion/core/src/physical_plan/union.rs
@@ -50,7 +50,43 @@ use crate::{
use datafusion_physical_expr::sort_expr_list_eq_strict_order;
use tokio::macros::support::thread_rng_n;
-/// UNION ALL execution plan
+/// `UnionExec`: `UNION ALL` execution plan.
+///
+/// `UnionExec` combines multiple inputs with the same schema by
+/// concatenating the partitions. It does not mix or copy data within
+/// or across partitions. Thus if the input partitions are sorted, the
+/// output partitions of the union are also sorted.
+///
+/// For example, given a `UnionExec` of two inputs, with `N`
+/// partitions, and `M` partitions, there will be `N+M` output
+/// partitions. The first `N` output partitions are from Input 1
+/// partitions, and then next `M` output partitions are from Input 2.
+///
+/// ```text
+/// ▲ ▲ ▲ ▲
+/// │ │ │ │
+/// Output │ ... │ │ │
+/// Partitions │0 │N-1 │ N │N+M-1
+///(passes through ┌────┴───────┴───────────┴─────────┴───┐
+/// the N+M input │ UnionExec │
+/// partitions) │ │
+/// └──────────────────────────────────────┘
+/// ▲
+/// │
+/// │
+/// Input ┌────────┬─────┴────┬──────────┐
+/// Partitions │ ... │ │ ... │
+/// 0 │ │ N-1 │ 0 │ M-1
+/// ┌────┴────────┴───┐ ┌───┴──────────┴───┐
+/// │ │ │ │
+/// │ │ │ │
+/// │ │ │ │
+/// │ │ │ │
+/// │ │ │ │
+/// │ │ │ │
+/// │Input 1 │ │Input 2 │
+/// └─────────────────┘ └──────────────────┘
+/// ```
#[derive(Debug)]
pub struct UnionExec {
/// Input execution plan
@@ -158,7 +194,7 @@ impl ExecutionPlan for UnionExec {
/// Specifies whether this plan generates an infinite stream of records.
/// If the plan does not support pipelining, but it its input(s) are
- /// infinite, returns an error to indicate this.
+ /// infinite, returns an error to indicate this.
fn unbounded_output(&self, children: &[bool]) -> Result<bool> {
Ok(children.iter().any(|x| *x))
}