You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ho...@apache.org on 2021/08/30 01:28:02 UTC
[arrow-datafusion] branch master updated: [Python] - Support show
function for DataFrame api of python library (#942)
This is an automated email from the ASF dual-hosted git repository.
houqp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 775477f [Python] - Support show function for DataFrame api of python library (#942)
775477f is described below
commit 775477f0377584733f86e7a8f7793fb1fad6a27f
Author: Francis Du <fr...@francisdu.com>
AuthorDate: Mon Aug 30 09:27:55 2021 +0800
[Python] - Support show function for DataFrame api of python library (#942)
* feat: Add show function for python library
* Update python/src/dataframe.rs
Co-authored-by: Andy Grove <an...@gmail.com>
---
python/src/dataframe.rs | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/python/src/dataframe.rs b/python/src/dataframe.rs
index 4a50262..8e5657b 100644
--- a/python/src/dataframe.rs
+++ b/python/src/dataframe.rs
@@ -28,6 +28,7 @@ use datafusion::{execution::context::ExecutionContextState, logical_plan};
use crate::{errors, to_py};
use crate::{errors::DataFusionError, expression};
+use datafusion::arrow::util::pretty;
/// A DataFrame is a representation of a logical plan and an API to compose statements.
/// Use it to build a plan and `.collect()` to execute the plan and collect the result.
@@ -139,6 +140,28 @@ impl DataFrame {
to_py::to_py(&batches)
}
+ /// Print the result, 20 lines by default
+ #[args(num = "20")]
+ fn show(&self, py: Python, num: usize) -> PyResult<()> {
+ let ctx = _ExecutionContext::from(self.ctx_state.clone());
+ let plan = ctx
+ .optimize(&self.limit(num)?.plan)
+ .and_then(|plan| ctx.create_physical_plan(&plan))
+ .map_err(|e| -> errors::DataFusionError { e.into() })?;
+
+ let rt = Runtime::new().unwrap();
+ let batches = py.allow_threads(|| {
+ rt.block_on(async {
+ collect(plan)
+ .await
+ .map_err(|e| -> errors::DataFusionError { e.into() })
+ })
+ })?;
+
+ Ok(pretty::print_batches(&batches).unwrap())
+ }
+
+
/// Returns the join of two DataFrames `on`.
fn join(&self, right: &DataFrame, on: Vec<&str>, how: &str) -> PyResult<Self> {
let builder = LogicalPlanBuilder::from(self.plan.clone());