You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2022/07/26 10:27:18 UTC
[arrow-datafusion-python] branch master updated: Add `with_column` to dataframe (#10)

This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion-python.git


The following commit(s) were added to refs/heads/master by this push:
     new 1d221b2  Add `with_column` to dataframe (#10)
1d221b2 is described below

commit 1d221b2e6c8c217aef4619d9c5a59231b48f7ad3
Author: cadl <ct...@gmail.com>
AuthorDate: Tue Jul 26 18:27:13 2022 +0800

    Add `with_column` to dataframe (#10)
    
    * feat: add df.with_column()
    
    * chore: ignore python dist files
---
 .gitignore                         | 13 +++++++++++++
 datafusion/tests/test_dataframe.py | 15 +++++++++++++++
 src/dataframe.rs                   |  5 +++++
 3 files changed, 33 insertions(+)

diff --git a/.gitignore b/.gitignore
index 64f40ab..b57efb7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,16 @@
 target
 Cargo.lock
 .idea
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+.python-version
diff --git a/datafusion/tests/test_dataframe.py b/datafusion/tests/test_dataframe.py
index 43c260a..bed0a91 100644
--- a/datafusion/tests/test_dataframe.py
+++ b/datafusion/tests/test_dataframe.py
@@ -103,6 +103,21 @@ def test_limit(df):
     assert len(result.column(1)) == 1
 
 
+def test_with_column(df):
+    df = df.with_column("c", column("a") + column("b"))
+
+    # execute and collect the first (and only) batch
+    result = df.collect()[0]
+
+    assert result.schema.field(0).name == "a"
+    assert result.schema.field(1).name == "b"
+    assert result.schema.field(2).name == "c"
+
+    assert result.column(0) == pa.array([1, 2, 3])
+    assert result.column(1) == pa.array([4, 5, 6])
+    assert result.column(2) == pa.array([5, 7, 9])
+
+
 def test_udf(df):
     # is_null is a pa function over arrays
     is_null = udf(
diff --git a/src/dataframe.rs b/src/dataframe.rs
index 983e13b..80963f7 100644
--- a/src/dataframe.rs
+++ b/src/dataframe.rs
@@ -87,6 +87,11 @@ impl PyDataFrame {
         Ok(Self::new(df))
     }
 
+    fn with_column(&self, name: &str, expr: PyExpr) -> PyResult<Self> {
+        let df = self.df.with_column(name, expr.into())?;
+        Ok(Self::new(df))
+    }
+
     fn aggregate(&self, group_by: Vec<PyExpr>, aggs: Vec<PyExpr>) -> PyResult<Self> {
         let group_by = group_by.into_iter().map(|e| e.into()).collect();
         let aggs = aggs.into_iter().map(|e| e.into()).collect();