You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "tustvold (via GitHub)" <gi...@apache.org> on 2023/04/09 08:08:52 UTC

[GitHub] [arrow-datafusion] tustvold commented on a diff in pull request #5889: Clean up SortExec creation and add doc comments

tustvold commented on code in PR #5889:
URL: https://github.com/apache/arrow-datafusion/pull/5889#discussion_r1161244363


##########
datafusion/core/src/physical_plan/sorts/sort.rs:
##########
@@ -628,42 +631,74 @@ pub struct SortExec {
     expr: Vec<PhysicalSortExpr>,
     /// Containing all metrics set created during sort
     metrics_set: CompositeMetricsSet,
-    /// Preserve partitions of input plan
+    /// Preserve partitions of input plan. If false, the input partitions
+    /// will be sorted and merged into a single output partition.
     preserve_partitioning: bool,
     /// Fetch highest/lowest n results
     fetch: Option<usize>,
 }
 
 impl SortExec {
     /// Create a new sort execution plan
+    #[deprecated(since = "22.0.0", note = "use `new` and `with_fetch`")]
     pub fn try_new(
         expr: Vec<PhysicalSortExpr>,
         input: Arc<dyn ExecutionPlan>,
         fetch: Option<usize>,
     ) -> Result<Self> {
-        Ok(Self::new_with_partitioning(expr, input, false, fetch))
+        Ok(Self::new(expr, input).with_fetch(fetch))
     }
 
-    /// Whether this `SortExec` preserves partitioning of the children
-    pub fn preserve_partitioning(&self) -> bool {
-        self.preserve_partitioning
+    /// Create a new sort execution plan that produces a single,
+    /// sorted output partition.
+    pub fn new(expr: Vec<PhysicalSortExpr>, input: Arc<dyn ExecutionPlan>) -> Self {
+        Self {
+            expr,
+            input,
+            metrics_set: CompositeMetricsSet::new(),
+            preserve_partitioning: false,
+            fetch: None,
+        }
     }
 
     /// Create a new sort execution plan with the option to preserve
     /// the partitioning of the input plan
+    #[deprecated(
+        since = "22.0.0",
+        note = "use `new`, `with_fetch` and `with_preserve_partioning` instead"
+    )]
     pub fn new_with_partitioning(
         expr: Vec<PhysicalSortExpr>,
         input: Arc<dyn ExecutionPlan>,
         preserve_partitioning: bool,
         fetch: Option<usize>,
     ) -> Self {
-        Self {
-            expr,
-            input,
-            metrics_set: CompositeMetricsSet::new(),
-            preserve_partitioning,
-            fetch,
-        }
+        Self::new(expr, input)
+            .with_fetch(fetch)
+            .with_preserve_partitioning(preserve_partitioning)
+    }
+
+    /// Whether this `SortExec` preserves partitioning of the children
+    pub fn preserve_partitioning(&self) -> bool {
+        self.preserve_partitioning
+    }
+
+    /// Specify the partitioning behavior of this sort exec
+    ///
+    /// If `preserve_partitioning` is true, sorts each partition
+    /// individually, producing one sorted strema for each input partition.
+    ///
+    /// If `preserve_partitioning` is false, sorts and merges all
+    /// input partitions producing a single, sorted partition.
+    pub fn with_preserve_partitioning(mut self, preserve_partitioning: bool) -> Self {
+        self.preserve_partitioning = preserve_partitioning;
+        self
+    }
+
+    /// Whether this `SortExec` preserves partitioning of the children
+    pub fn with_fetch(mut self, fetch: Option<usize>) -> Self {

Review Comment:
   I wonder if limit might be a more standard name for this?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org