You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2022/07/26 10:27:18 UTC
[arrow-datafusion-python] branch master updated: Add `with_column` to dataframe (#10)
This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion-python.git
The following commit(s) were added to refs/heads/master by this push:
new 1d221b2 Add `with_column` to dataframe (#10)
1d221b2 is described below
commit 1d221b2e6c8c217aef4619d9c5a59231b48f7ad3
Author: cadl <ct...@gmail.com>
AuthorDate: Tue Jul 26 18:27:13 2022 +0800
Add `with_column` to dataframe (#10)
* feat: add df.with_column()
* chore: ignore python dist files
---
.gitignore | 13 +++++++++++++
datafusion/tests/test_dataframe.py | 15 +++++++++++++++
src/dataframe.rs | 5 +++++
3 files changed, 33 insertions(+)
diff --git a/.gitignore b/.gitignore
index 64f40ab..b57efb7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,16 @@
target
Cargo.lock
.idea
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+.python-version
diff --git a/datafusion/tests/test_dataframe.py b/datafusion/tests/test_dataframe.py
index 43c260a..bed0a91 100644
--- a/datafusion/tests/test_dataframe.py
+++ b/datafusion/tests/test_dataframe.py
@@ -103,6 +103,21 @@ def test_limit(df):
assert len(result.column(1)) == 1
+def test_with_column(df):
+ df = df.with_column("c", column("a") + column("b"))
+
+ # execute and collect the first (and only) batch
+ result = df.collect()[0]
+
+ assert result.schema.field(0).name == "a"
+ assert result.schema.field(1).name == "b"
+ assert result.schema.field(2).name == "c"
+
+ assert result.column(0) == pa.array([1, 2, 3])
+ assert result.column(1) == pa.array([4, 5, 6])
+ assert result.column(2) == pa.array([5, 7, 9])
+
+
def test_udf(df):
# is_null is a pa function over arrays
is_null = udf(
diff --git a/src/dataframe.rs b/src/dataframe.rs
index 983e13b..80963f7 100644
--- a/src/dataframe.rs
+++ b/src/dataframe.rs
@@ -87,6 +87,11 @@ impl PyDataFrame {
Ok(Self::new(df))
}
+ fn with_column(&self, name: &str, expr: PyExpr) -> PyResult<Self> {
+ let df = self.df.with_column(name, expr.into())?;
+ Ok(Self::new(df))
+ }
+
fn aggregate(&self, group_by: Vec<PyExpr>, aggs: Vec<PyExpr>) -> PyResult<Self> {
let group_by = group_by.into_iter().map(|e| e.into()).collect();
let aggs = aggs.into_iter().map(|e| e.into()).collect();