You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ji...@apache.org on 2022/11/02 09:07:45 UTC

[arrow-datafusion-python] branch master updated: [DataFrame] - Add cache function to DataFrame (#62)

This is an automated email from the ASF dual-hosted git repository.

jiayuliu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion-python.git


The following commit(s) were added to refs/heads/master by this push:
     new 940f118  [DataFrame] - Add cache function to DataFrame (#62)
940f118 is described below

commit 940f11888fc5ac1149513ab2004178f0360f32af
Author: Francis Du <me...@francis.run>
AuthorDate: Wed Nov 2 17:07:40 2022 +0800

    [DataFrame] - Add cache function to DataFrame (#62)
    
    * feat: add cache function to DataFrame
    
    * fix: typo
---
 datafusion/tests/test_dataframe.py | 4 ++++
 src/dataframe.rs                   | 6 ++++++
 2 files changed, 10 insertions(+)

diff --git a/datafusion/tests/test_dataframe.py b/datafusion/tests/test_dataframe.py
index 9880b6d..51f7c22 100644
--- a/datafusion/tests/test_dataframe.py
+++ b/datafusion/tests/test_dataframe.py
@@ -381,3 +381,7 @@ def test_union_distinct(ctx):
 
     assert df_c.collect() == df_a_u_b.collect()
     assert df_c.collect() == df_a_u_b.collect()
+
+
+def test_cache(df):
+    assert df.cache().collect() == df.collect()
diff --git a/src/dataframe.rs b/src/dataframe.rs
index 054e09f..997ba98 100644
--- a/src/dataframe.rs
+++ b/src/dataframe.rs
@@ -128,6 +128,12 @@ impl PyDataFrame {
         batches.into_iter().map(|rb| rb.to_pyarrow(py)).collect()
     }
 
+    /// Cache DataFrame.
+    fn cache(&self, py: Python) -> PyResult<Self> {
+        let df = wait_for_future(py, self.df.cache())?;
+        Ok(Self::new(df))
+    }
+
     /// Executes this DataFrame and collects all results into a vector of vector of RecordBatch
     /// maintaining the input partitioning.
     fn collect_partitioned(&self, py: Python) -> PyResult<Vec<Vec<PyObject>>> {