You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/11/04 10:27:15 UTC

(arrow-datafusion) branch main updated: Improve comments for `PartitionSearchMode` struct (#8047)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 2af326a0bd Improve comments for `PartitionSearchMode` struct (#8047)
2af326a0bd is described below

commit 2af326a0bdbb60e7d7420089c7aa9ff22c7319ee
Author: Mehmet Ozan Kabak <oz...@gmail.com>
AuthorDate: Sat Nov 4 13:27:10 2023 +0300

    Improve comments for `PartitionSearchMode` struct (#8047)
    
    * Improve comments
    
    * Make comments partition/group agnostic
---
 .../physical-plan/src/joins/nested_loop_join.rs    |  2 +-
 datafusion/physical-plan/src/lib.rs                |  6 ++---
 datafusion/physical-plan/src/windows/mod.rs        | 27 ++++++++++++++--------
 3 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index 5a77ed6e29..6951642ff8 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -48,9 +48,9 @@ use datafusion_common::{exec_err, DataFusionError, JoinSide, Result, Statistics}
 use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
 use datafusion_execution::TaskContext;
 use datafusion_expr::JoinType;
+use datafusion_physical_expr::equivalence::join_equivalence_properties;
 use datafusion_physical_expr::{EquivalenceProperties, PhysicalSortExpr};
 
-use datafusion_physical_expr::equivalence::join_equivalence_properties;
 use futures::{ready, Stream, StreamExt, TryStreamExt};
 
 /// Data of the inner table side
diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs
index 694a6d3e5c..9519f6a5a1 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -208,9 +208,9 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
         EquivalenceProperties::new(self.schema())
     }
 
-    /// Get a list of `ExecutionPlan` that provide input for this plan. The
-    /// returned list will be empty for leaf nodes such as scans, will contain a
-    /// single value for unary nodes, or two values for binary nodes (such as
+    /// Get a list of children `ExecutionPlan`s that act as inputs to this plan.
+    /// The returned list will be empty for leaf nodes such as scans, will contain
+    /// a single value for unary nodes, or two values for binary nodes (such as
     /// joins).
     fn children(&self) -> Vec<Arc<dyn ExecutionPlan>>;
 
diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
index 26dddc4ddd..b6ed6e482f 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -55,19 +55,26 @@ pub use datafusion_physical_expr::window::{
 };
 
 #[derive(Debug, Clone, PartialEq)]
-/// Specifies partition expression properties in terms of existing ordering(s).
-/// As an example if existing ordering is [a ASC, b ASC, c ASC],
-/// `PARTITION BY b` will have `PartitionSearchMode::Linear`.
-/// `PARTITION BY a, c` and `PARTITION BY c, a` will have `PartitionSearchMode::PartiallySorted(0)`, `PartitionSearchMode::PartiallySorted(1)`
-///  respectively (subset `a` defines an ordered section. Indices points to index of `a` among partition by expressions).
-/// `PARTITION BY a, b` and `PARTITION BY b, a` will have `PartitionSearchMode::Sorted` mode.
+/// Specifies aggregation grouping and/or window partitioning properties of a
+/// set of expressions in terms of the existing ordering.
+/// For example, if the existing ordering is `[a ASC, b ASC, c ASC]`:
+/// - A `PARTITION BY b` clause will result in `Linear` mode.
+/// - A `PARTITION BY a, c` or a `PARTITION BY c, a` clause will result in
+///   `PartiallySorted([0])` or `PartiallySorted([1])` modes, respectively.
+///   The vector stores the index of `a` in the respective PARTITION BY expression.
+/// - A `PARTITION BY a, b` or a `PARTITION BY b, a` clause will result in
+///   `Sorted` mode.
+/// Note that the examples above are applicable for `GROUP BY` clauses too.
 pub enum PartitionSearchMode {
-    /// None of the partition expressions is ordered.
+    /// There is no partial permutation of the expressions satisfying the
+    /// existing ordering.
     Linear,
-    /// A non-empty subset of the the partition expressions are ordered.
-    /// Indices stored constructs ordered subset, that is satisfied by existing ordering(s).
+    /// There is a partial permutation of the expressions satisfying the
+    /// existing ordering. Indices describing the longest partial permutation
+    /// are stored in the vector.
     PartiallySorted(Vec<usize>),
-    /// All Partition expressions are ordered (Also empty case)
+    /// There is a (full) permutation of the expressions satisfying the
+    /// existing ordering.
     Sorted,
 }