You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ho...@apache.org on 2021/08/30 01:28:02 UTC

[arrow-datafusion] branch master updated: [Python] - Support show function for DataFrame api of python library (#942)

This is an automated email from the ASF dual-hosted git repository.

houqp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 775477f  [Python] - Support show function for DataFrame api of python library (#942)
775477f is described below

commit 775477f0377584733f86e7a8f7793fb1fad6a27f
Author: Francis Du <fr...@francisdu.com>
AuthorDate: Mon Aug 30 09:27:55 2021 +0800

    [Python] - Support show function for DataFrame api of python library (#942)
    
    * feat: Add show function for python library
    
    * Update python/src/dataframe.rs
    
    Co-authored-by: Andy Grove <an...@gmail.com>
---
 python/src/dataframe.rs | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/python/src/dataframe.rs b/python/src/dataframe.rs
index 4a50262..8e5657b 100644
--- a/python/src/dataframe.rs
+++ b/python/src/dataframe.rs
@@ -28,6 +28,7 @@ use datafusion::{execution::context::ExecutionContextState, logical_plan};
 
 use crate::{errors, to_py};
 use crate::{errors::DataFusionError, expression};
+use datafusion::arrow::util::pretty;
 
 /// A DataFrame is a representation of a logical plan and an API to compose statements.
 /// Use it to build a plan and `.collect()` to execute the plan and collect the result.
@@ -139,6 +140,28 @@ impl DataFrame {
         to_py::to_py(&batches)
     }
 
+    /// Print the result, 20 lines by default
+    #[args(num = "20")]
+    fn show(&self, py: Python, num: usize) -> PyResult<()> {
+        let ctx = _ExecutionContext::from(self.ctx_state.clone());
+        let plan = ctx
+            .optimize(&self.limit(num)?.plan)
+            .and_then(|plan| ctx.create_physical_plan(&plan))
+            .map_err(|e| -> errors::DataFusionError { e.into() })?;
+
+        let rt = Runtime::new().unwrap();
+        let batches = py.allow_threads(|| {
+            rt.block_on(async {
+                collect(plan)
+                    .await
+                    .map_err(|e| -> errors::DataFusionError { e.into() })
+            })
+        })?;
+
+        Ok(pretty::print_batches(&batches).unwrap())
+    }
+
+
     /// Returns the join of two DataFrames `on`.
     fn join(&self, right: &DataFrame, on: Vec<&str>, how: &str) -> PyResult<Self> {
         let builder = LogicalPlanBuilder::from(self.plan.clone());