You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/11/04 10:27:15 UTC
(arrow-datafusion) branch main updated: Improve comments for `PartitionSearchMode` struct (#8047)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 2af326a0bd Improve comments for `PartitionSearchMode` struct (#8047)
2af326a0bd is described below
commit 2af326a0bdbb60e7d7420089c7aa9ff22c7319ee
Author: Mehmet Ozan Kabak <oz...@gmail.com>
AuthorDate: Sat Nov 4 13:27:10 2023 +0300
Improve comments for `PartitionSearchMode` struct (#8047)
* Improve comments
* Make comments partition/group agnostic
---
.../physical-plan/src/joins/nested_loop_join.rs | 2 +-
datafusion/physical-plan/src/lib.rs | 6 ++---
datafusion/physical-plan/src/windows/mod.rs | 27 ++++++++++++++--------
3 files changed, 21 insertions(+), 14 deletions(-)
diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index 5a77ed6e29..6951642ff8 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -48,9 +48,9 @@ use datafusion_common::{exec_err, DataFusionError, JoinSide, Result, Statistics}
use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
use datafusion_execution::TaskContext;
use datafusion_expr::JoinType;
+use datafusion_physical_expr::equivalence::join_equivalence_properties;
use datafusion_physical_expr::{EquivalenceProperties, PhysicalSortExpr};
-use datafusion_physical_expr::equivalence::join_equivalence_properties;
use futures::{ready, Stream, StreamExt, TryStreamExt};
/// Data of the inner table side
diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs
index 694a6d3e5c..9519f6a5a1 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -208,9 +208,9 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
EquivalenceProperties::new(self.schema())
}
- /// Get a list of `ExecutionPlan` that provide input for this plan. The
- /// returned list will be empty for leaf nodes such as scans, will contain a
- /// single value for unary nodes, or two values for binary nodes (such as
+ /// Get a list of children `ExecutionPlan`s that act as inputs to this plan.
+ /// The returned list will be empty for leaf nodes such as scans, will contain
+ /// a single value for unary nodes, or two values for binary nodes (such as
/// joins).
fn children(&self) -> Vec<Arc<dyn ExecutionPlan>>;
diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
index 26dddc4ddd..b6ed6e482f 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -55,19 +55,26 @@ pub use datafusion_physical_expr::window::{
};
#[derive(Debug, Clone, PartialEq)]
-/// Specifies partition expression properties in terms of existing ordering(s).
-/// As an example if existing ordering is [a ASC, b ASC, c ASC],
-/// `PARTITION BY b` will have `PartitionSearchMode::Linear`.
-/// `PARTITION BY a, c` and `PARTITION BY c, a` will have `PartitionSearchMode::PartiallySorted(0)`, `PartitionSearchMode::PartiallySorted(1)`
-/// respectively (subset `a` defines an ordered section. Indices points to index of `a` among partition by expressions).
-/// `PARTITION BY a, b` and `PARTITION BY b, a` will have `PartitionSearchMode::Sorted` mode.
+/// Specifies aggregation grouping and/or window partitioning properties of a
+/// set of expressions in terms of the existing ordering.
+/// For example, if the existing ordering is `[a ASC, b ASC, c ASC]`:
+/// - A `PARTITION BY b` clause will result in `Linear` mode.
+/// - A `PARTITION BY a, c` or a `PARTITION BY c, a` clause will result in
+/// `PartiallySorted([0])` or `PartiallySorted([1])` modes, respectively.
+/// The vector stores the index of `a` in the respective PARTITION BY expression.
+/// - A `PARTITION BY a, b` or a `PARTITION BY b, a` clause will result in
+/// `Sorted` mode.
+/// Note that the examples above are applicable for `GROUP BY` clauses too.
pub enum PartitionSearchMode {
- /// None of the partition expressions is ordered.
+ /// There is no partial permutation of the expressions satisfying the
+ /// existing ordering.
Linear,
- /// A non-empty subset of the the partition expressions are ordered.
- /// Indices stored constructs ordered subset, that is satisfied by existing ordering(s).
+ /// There is a partial permutation of the expressions satisfying the
+ /// existing ordering. Indices describing the longest partial permutation
+ /// are stored in the vector.
PartiallySorted(Vec<usize>),
- /// All Partition expressions are ordered (Also empty case)
+ /// There is a (full) permutation of the expressions satisfying the
+ /// existing ordering.
Sorted,
}