You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "jackwener (via GitHub)" <gi...@apache.org> on 2023/04/18 13:32:05 UTC

[GitHub] [arrow-datafusion] jackwener commented on a diff in pull request #6039: fix: incorrect column pruning in sql with window operations

jackwener commented on code in PR #6039:
URL: https://github.com/apache/arrow-datafusion/pull/6039#discussion_r1170040909


##########
datafusion/optimizer/src/push_down_projection.rs:
##########
@@ -312,11 +312,31 @@ impl OptimizerRule for PushDownProjection {
 
                 if new_window_expr.is_empty() {
                     // none columns in window expr are needed, remove the window expr
-                    let new_window = window.input.as_ref().clone();
+                    let new_window = window.input.clone();
+
+                    let new_window = restrict_outputs(&new_window, &required_columns)?
+                        .unwrap_or((*new_window).clone());

Review Comment:
   ```suggestion
                       let input = window.input.as_ref();
   
                       let new_input = restrict_outputs(input, &required_columns)?
                           .unwrap_or(input.clone());
   ```



##########
datafusion/optimizer/src/push_down_projection.rs:
##########
@@ -312,11 +312,31 @@ impl OptimizerRule for PushDownProjection {
 
                 if new_window_expr.is_empty() {
                     // none columns in window expr are needed, remove the window expr
-                    let new_window = window.input.as_ref().clone();
+                    let new_window = window.input.clone();
+
+                    let new_window = restrict_outputs(&new_window, &required_columns)?
+                        .unwrap_or((*new_window).clone());
 
                     generate_plan!(projection_is_empty, plan, new_window)
                 } else {
-                    let new_window = LogicalPlanBuilder::from((*(window.input)).clone())
+                    let mut referenced_inputs = HashSet::new();
+                    exprlist_to_columns(&new_window_expr, &mut referenced_inputs)?;
+                    window
+                        .input
+                        .schema()
+                        .fields()
+                        .iter()
+                        .filter(|f| required_columns.contains(&f.qualified_column()))
+                        .for_each(|f| {
+                            referenced_inputs.insert(f.qualified_column());
+                        });
+
+                    let window_child = window.input.clone();
+                    let new_window_child =
+                        restrict_outputs(&window_child, &referenced_inputs)?
+                            .unwrap_or((*window_child).clone());
+
+                    let new_window = LogicalPlanBuilder::from(new_window_child)

Review Comment:
   ```suggestion
                       let input = window.input.as_ref();
                       let new_input = restrict_outputs(input, &referenced_inputs)?
                           .unwrap_or(input.clone());
   
                       let new_window = LogicalPlanBuilder::from(new_input)
   ```



##########
datafusion/optimizer/src/push_down_projection.rs:
##########
@@ -553,6 +573,21 @@ fn push_down_scan(
     }))
 }
 
+fn restrict_outputs(
+    plan: &Arc<LogicalPlan>,

Review Comment:
   ```suggestion
       plan: &LogicalPlan,
   ```



##########
datafusion/optimizer/src/push_down_projection.rs:
##########
@@ -553,6 +573,21 @@ fn push_down_scan(
     }))
 }
 
+fn restrict_outputs(
+    plan: &Arc<LogicalPlan>,
+    permitted_outputs: &HashSet<Column>,
+) -> Result<Option<LogicalPlan>> {
+    let schema = plan.schema();
+    if permitted_outputs.len() == schema.fields().len() {
+        return Ok(None);
+    }
+    Ok(Some(generate_projection(
+        permitted_outputs,
+        schema,
+        plan.clone(),

Review Comment:
   ```suggestion
           Arc::new(plan.clone()),
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org