You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ho...@apache.org on 2021/11/12 04:54:23 UTC
[arrow-datafusion] branch master updated: Dataframe supports except
and update readme (#1261)
This is an automated email from the ASF dual-hosted git repository.
houqp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 0798ca4 Dataframe supports except and update readme (#1261)
0798ca4 is described below
commit 0798ca496801c13fe40587946d164f9969584e8d
Author: Carlos <wx...@gmail.com>
AuthorDate: Fri Nov 12 12:54:18 2021 +0800
Dataframe supports except and update readme (#1261)
---
README.md | 6 +++---
datafusion/src/dataframe.rs | 15 +++++++++++++++
datafusion/src/execution/dataframe_impl.rs | 23 +++++++++++++++++++++++
3 files changed, 41 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index 7807f26..5ca0802 100644
--- a/README.md
+++ b/README.md
@@ -215,13 +215,13 @@ DataFusion also includes a simple command-line interactive SQL utility. See the
- [ ] Lists
- [x] Subqueries
- [x] Common table expressions
-- [ ] Set Operations
+- [x] Set Operations
- [x] UNION ALL
- [x] UNION
- [x] INTERSECT
- [x] INTERSECT ALL
- - [ ] EXCEPT
- - [ ] EXCEPT ALL
+ - [x] EXCEPT
+ - [x] EXCEPT ALL
- [x] Joins
- [x] INNER JOIN
- [x] LEFT JOIN
diff --git a/datafusion/src/dataframe.rs b/datafusion/src/dataframe.rs
index 7f4a3e5..c8c5dcc 100644
--- a/datafusion/src/dataframe.rs
+++ b/datafusion/src/dataframe.rs
@@ -390,4 +390,19 @@ pub trait DataFrame: Send + Sync {
/// # }
/// ```
fn intersect(&self, dataframe: Arc<dyn DataFrame>) -> Result<Arc<dyn DataFrame>>;
+
+ /// Calculate the exception of two [`DataFrame`]s. The two [`DataFrame`]s must have exactly the same schema
+ ///
+ /// ```
+ /// # use datafusion::prelude::*;
+ /// # use datafusion::error::Result;
+ /// # #[tokio::main]
+ /// # async fn main() -> Result<()> {
+ /// let mut ctx = ExecutionContext::new();
+ /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
+ /// let df = df.except(df.clone())?;
+ /// # Ok(())
+ /// # }
+ /// ```
+ fn except(&self, dataframe: Arc<dyn DataFrame>) -> Result<Arc<dyn DataFrame>>;
}
diff --git a/datafusion/src/execution/dataframe_impl.rs b/datafusion/src/execution/dataframe_impl.rs
index c202e19..f565f5c 100644
--- a/datafusion/src/execution/dataframe_impl.rs
+++ b/datafusion/src/execution/dataframe_impl.rs
@@ -240,6 +240,15 @@ impl DataFrame for DataFrameImpl {
&LogicalPlanBuilder::intersect(left_plan, right_plan, true)?,
)))
}
+
+ fn except(&self, dataframe: Arc<dyn DataFrame>) -> Result<Arc<dyn DataFrame>> {
+ let left_plan = self.to_logical_plan();
+ let right_plan = dataframe.to_logical_plan();
+ Ok(Arc::new(DataFrameImpl::new(
+ self.ctx_state.clone(),
+ &LogicalPlanBuilder::except(left_plan, right_plan, true)?,
+ )))
+ }
}
#[cfg(test)]
@@ -461,6 +470,20 @@ mod tests {
Ok(())
}
+ #[tokio::test]
+ async fn except() -> Result<()> {
+ let df = test_table().await?.select_columns(&["c1", "c3"])?;
+ let plan = df.except(df.clone())?;
+ let result = plan.to_logical_plan();
+ let expected = create_plan(
+ "SELECT c1, c3 FROM aggregate_test_100
+ EXCEPT ALL SELECT c1, c3 FROM aggregate_test_100",
+ )
+ .await?;
+ assert_same_plan(&result, &expected);
+ Ok(())
+ }
+
/// Compare the formatted string representation of two plans for equality
fn assert_same_plan(plan1: &LogicalPlan, plan2: &LogicalPlan) {
assert_eq!(format!("{:?}", plan1), format!("{:?}", plan2));