You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/06/06 11:18:50 UTC

[GitHub] [arrow-datafusion] alamb commented on a change in pull request #506: Add window frame constructs - alternative

alamb commented on a change in pull request #506:
URL: https://github.com/apache/arrow-datafusion/pull/506#discussion_r646116862



##########
File path: datafusion/src/physical_plan/window_frames.rs
##########
@@ -0,0 +1,337 @@
+// Licensed to the Apache Software Foundation (ASF) under one

Review comment:
       I agree it would make more sense for this module to be in `logical_plan` (I think it would also be fine to do as a follow on PR)

##########
File path: datafusion/src/logical_plan/expr.rs
##########
@@ -1283,8 +1284,23 @@ impl fmt::Debug for Expr {
             Expr::ScalarUDF { fun, ref args, .. } => {
                 fmt_function(f, &fun.name, false, args)
             }
-            Expr::WindowFunction { fun, ref args, .. } => {
-                fmt_function(f, &fun.to_string(), false, args)
+            Expr::WindowFunction {
+                fun,
+                ref args,
+                window_frame,
+                ..
+            } => {
+                fmt_function(f, &fun.to_string(), false, args)?;
+                if let Some(window_frame) = window_frame {
+                    write!(
+                        f,
+                        " {} BETWEEN {} AND {}",

Review comment:
       Is the leading space on purpose? It seems like maybe this should be 
   
   ```suggestion
                           "{} BETWEEN {} AND {}",
   ```
   
   ?

##########
File path: datafusion/src/physical_plan/window_frames.rs
##########
@@ -0,0 +1,337 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Window frame
+//!
+//! The frame-spec determines which output rows are read by an aggregate window function. The frame-spec consists of four parts:
+//! - A frame type - either ROWS, RANGE or GROUPS,
+//! - A starting frame boundary,
+//! - An ending frame boundary,
+//! - An EXCLUDE clause.
+
+use crate::error::{DataFusionError, Result};
+use sqlparser::ast;
+use std::cmp::Ordering;
+use std::convert::{From, TryFrom};
+use std::fmt;
+
+/// The frame-spec determines which output rows are read by an aggregate window function.
+///
+/// The ending frame boundary can be omitted (if the BETWEEN and AND keywords that surround the
+/// starting frame boundary are also omitted), in which case the ending frame boundary defaults to
+/// CURRENT ROW.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct WindowFrame {
+    /// A frame type - either ROWS, RANGE or GROUPS
+    pub units: WindowFrameUnits,
+    /// A starting frame boundary
+    pub start_bound: WindowFrameBound,
+    /// An ending frame boundary
+    pub end_bound: WindowFrameBound,
+}
+
+impl fmt::Display for WindowFrame {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "{} BETWEEN {} AND {}",
+            self.units, self.start_bound, self.end_bound
+        )?;
+        Ok(())
+    }
+}
+
+impl TryFrom<ast::WindowFrame> for WindowFrame {
+    type Error = DataFusionError;
+
+    fn try_from(value: ast::WindowFrame) -> Result<Self> {

Review comment:
       👍 I really like this structure

##########
File path: datafusion/src/physical_plan/window_frames.rs
##########
@@ -0,0 +1,337 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Window frame
+//!
+//! The frame-spec determines which output rows are read by an aggregate window function. The frame-spec consists of four parts:
+//! - A frame type - either ROWS, RANGE or GROUPS,
+//! - A starting frame boundary,
+//! - An ending frame boundary,
+//! - An EXCLUDE clause.
+
+use crate::error::{DataFusionError, Result};
+use sqlparser::ast;
+use std::cmp::Ordering;
+use std::convert::{From, TryFrom};
+use std::fmt;
+
+/// The frame-spec determines which output rows are read by an aggregate window function.
+///
+/// The ending frame boundary can be omitted (if the BETWEEN and AND keywords that surround the
+/// starting frame boundary are also omitted), in which case the ending frame boundary defaults to
+/// CURRENT ROW.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct WindowFrame {
+    /// A frame type - either ROWS, RANGE or GROUPS
+    pub units: WindowFrameUnits,
+    /// A starting frame boundary
+    pub start_bound: WindowFrameBound,
+    /// An ending frame boundary
+    pub end_bound: WindowFrameBound,
+}
+
+impl fmt::Display for WindowFrame {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "{} BETWEEN {} AND {}",
+            self.units, self.start_bound, self.end_bound
+        )?;
+        Ok(())
+    }
+}
+
+impl TryFrom<ast::WindowFrame> for WindowFrame {
+    type Error = DataFusionError;
+
+    fn try_from(value: ast::WindowFrame) -> Result<Self> {
+        let start_bound = value.start_bound.into();
+        let end_bound = value
+            .end_bound
+            .map(WindowFrameBound::from)
+            .unwrap_or(WindowFrameBound::CurrentRow);
+
+        if let WindowFrameBound::Following(None) = start_bound {
+            Err(DataFusionError::Execution(
+                "Invalid window frame: start bound cannot be unbounded following"
+                    .to_owned(),
+            ))
+        } else if let WindowFrameBound::Preceding(None) = end_bound {
+            Err(DataFusionError::Execution(
+                "Invalid window frame: end bound cannot be unbounded preceding"
+                    .to_owned(),
+            ))
+        } else if start_bound > end_bound {
+            Err(DataFusionError::Execution(format!(
+            "Invalid window frame: start bound ({}) cannot be larger than end bound ({})",
+            start_bound, end_bound
+        )))
+        } else {
+            let units = value.units.into();
+            Ok(Self {
+                units,
+                start_bound,
+                end_bound,
+            })
+        }
+    }
+}
+
+impl Default for WindowFrame {
+    fn default() -> Self {
+        WindowFrame {
+            units: WindowFrameUnits::Range,
+            start_bound: WindowFrameBound::Preceding(None),
+            end_bound: WindowFrameBound::CurrentRow,
+        }
+    }
+}
+
+/// There are five ways to describe starting and ending frame boundaries:
+///
+/// 1. UNBOUNDED PRECEDING
+/// 2. <expr> PRECEDING

Review comment:
       👍 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org