You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2023/01/12 17:41:51 UTC

[arrow-datafusion] branch master updated: Minor: Add docstrings to UnionExec (#4884)

This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 3b86643ab Minor: Add docstrings to UnionExec (#4884)
3b86643ab is described below

commit 3b86643ab966ce34f1c69fea6f4070437e27ff82
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Thu Jan 12 18:41:44 2023 +0100

    Minor: Add docstrings to UnionExec (#4884)
---
 datafusion/core/src/physical_plan/union.rs | 40 ++++++++++++++++++++++++++++--
 1 file changed, 38 insertions(+), 2 deletions(-)

diff --git a/datafusion/core/src/physical_plan/union.rs b/datafusion/core/src/physical_plan/union.rs
index 9ffea72e8..921a0d99f 100644
--- a/datafusion/core/src/physical_plan/union.rs
+++ b/datafusion/core/src/physical_plan/union.rs
@@ -50,7 +50,43 @@ use crate::{
 use datafusion_physical_expr::sort_expr_list_eq_strict_order;
 use tokio::macros::support::thread_rng_n;
 
-/// UNION ALL execution plan
+/// `UnionExec`: `UNION ALL` execution plan.
+///
+/// `UnionExec` combines multiple inputs with the same schema by
+/// concatenating the partitions.  It does not mix or copy data within
+/// or across partitions. Thus if the input partitions are sorted, the
+/// output partitions of the union are also sorted.
+///
+/// For example, given a `UnionExec` of two inputs, with `N`
+/// partitions, and `M` partitions, there will be `N+M` output
+/// partitions. The first `N` output partitions are from Input 1
+/// partitions, and then next `M` output partitions are from Input 2.
+///
+/// ```text
+///                       ▲       ▲           ▲         ▲
+///                       │       │           │         │
+///     Output            │  ...  │           │         │
+///   Partitions          │0      │N-1        │ N       │N+M-1
+///(passes through   ┌────┴───────┴───────────┴─────────┴───┐
+/// the N+M input    │              UnionExec               │
+///  partitions)     │                                      │
+///                  └──────────────────────────────────────┘
+///                                      ▲
+///                                      │
+///                                      │
+///       Input           ┌────────┬─────┴────┬──────────┐
+///     Partitions        │ ...    │          │     ...  │
+///                    0  │        │ N-1      │ 0        │  M-1
+///                  ┌────┴────────┴───┐  ┌───┴──────────┴───┐
+///                  │                 │  │                  │
+///                  │                 │  │                  │
+///                  │                 │  │                  │
+///                  │                 │  │                  │
+///                  │                 │  │                  │
+///                  │                 │  │                  │
+///                  │Input 1          │  │Input 2           │
+///                  └─────────────────┘  └──────────────────┘
+/// ```
 #[derive(Debug)]
 pub struct UnionExec {
     /// Input execution plan
@@ -158,7 +194,7 @@ impl ExecutionPlan for UnionExec {
 
     /// Specifies whether this plan generates an infinite stream of records.
     /// If the plan does not support pipelining, but it its input(s) are
-    /// infinite, returns an error to indicate this.    
+    /// infinite, returns an error to indicate this.
     fn unbounded_output(&self, children: &[bool]) -> Result<bool> {
         Ok(children.iter().any(|x| *x))
     }