You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2020/12/02 19:34:44 UTC

[arrow] branch master updated: ARROW-10789: [Rust][DataFusion] Make TableProvider dynamically typed

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 96067d0  ARROW-10789: [Rust][DataFusion] Make TableProvider dynamically typed
96067d0 is described below

commit 96067d0c639d32d7645b7168186bf5dbe525578b
Author: rdettai <rd...@gmail.com>
AuthorDate: Wed Dec 2 14:32:23 2020 -0500

    ARROW-10789: [Rust][DataFusion] Make TableProvider dynamically typed
    
    > The `TableProvider` trait can be used to provide custom datasources to the query plan. It can be useful for usecases like plan serialization to be able to downcast to the concrete implementation, the same way it is done for the `ExecutionPlan` trait.
    
    https://issues.apache.org/jira/browse/ARROW-10789
    
    Closes #8819 from rdettai/ARROW-10789-dyn-tableprovider
    
    Authored-by: rdettai <rd...@gmail.com>
    Signed-off-by: Andrew Lamb <an...@nerdnetworks.org>
---
 rust/datafusion/src/datasource/csv.rs        | 5 +++++
 rust/datafusion/src/datasource/datasource.rs | 5 +++++
 rust/datafusion/src/datasource/memory.rs     | 5 +++++
 rust/datafusion/src/datasource/parquet.rs    | 5 +++++
 rust/datafusion/tests/dataframe.rs           | 5 ++++-
 5 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/rust/datafusion/src/datasource/csv.rs b/rust/datafusion/src/datasource/csv.rs
index b0317c1..351eaff 100644
--- a/rust/datafusion/src/datasource/csv.rs
+++ b/rust/datafusion/src/datasource/csv.rs
@@ -34,6 +34,7 @@
 //! ```
 
 use arrow::datatypes::SchemaRef;
+use std::any::Any;
 use std::string::String;
 use std::sync::Arc;
 
@@ -79,6 +80,10 @@ impl CsvFile {
 }
 
 impl TableProvider for CsvFile {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
     fn schema(&self) -> SchemaRef {
         self.schema.clone()
     }
diff --git a/rust/datafusion/src/datasource/datasource.rs b/rust/datafusion/src/datasource/datasource.rs
index 3b1cf0c..e7371ea 100644
--- a/rust/datafusion/src/datasource/datasource.rs
+++ b/rust/datafusion/src/datasource/datasource.rs
@@ -17,6 +17,7 @@
 
 //! Data source traits
 
+use std::any::Any;
 use std::sync::Arc;
 
 use crate::arrow::datatypes::SchemaRef;
@@ -25,6 +26,10 @@ use crate::physical_plan::ExecutionPlan;
 
 /// Source table
 pub trait TableProvider {
+    /// Returns the table provider as [`Any`](std::any::Any) so that it can be
+    /// downcast to a specific implementation.
+    fn as_any(&self) -> &dyn Any;
+
     /// Get a reference to the schema for this table
     fn schema(&self) -> SchemaRef;
 
diff --git a/rust/datafusion/src/datasource/memory.rs b/rust/datafusion/src/datasource/memory.rs
index 19efa0a..8fa140b 100644
--- a/rust/datafusion/src/datasource/memory.rs
+++ b/rust/datafusion/src/datasource/memory.rs
@@ -19,6 +19,7 @@
 //! queried by DataFusion. This allows data to be pre-loaded into memory and then
 //! repeatedly queried without incurring additional file I/O overhead.
 
+use std::any::Any;
 use std::sync::Arc;
 
 use arrow::datatypes::{Field, Schema, SchemaRef};
@@ -85,6 +86,10 @@ impl MemTable {
 }
 
 impl TableProvider for MemTable {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
     fn schema(&self) -> SchemaRef {
         self.schema.clone()
     }
diff --git a/rust/datafusion/src/datasource/parquet.rs b/rust/datafusion/src/datasource/parquet.rs
index eb4e0be..be65e63 100644
--- a/rust/datafusion/src/datasource/parquet.rs
+++ b/rust/datafusion/src/datasource/parquet.rs
@@ -17,6 +17,7 @@
 
 //! Parquet data source
 
+use std::any::Any;
 use std::string::String;
 use std::sync::Arc;
 
@@ -46,6 +47,10 @@ impl ParquetTable {
 }
 
 impl TableProvider for ParquetTable {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
     /// Get the schema for this parquet file.
     fn schema(&self) -> SchemaRef {
         self.schema.clone()
diff --git a/rust/datafusion/tests/dataframe.rs b/rust/datafusion/tests/dataframe.rs
index db31a79..d17deff 100644
--- a/rust/datafusion/tests/dataframe.rs
+++ b/rust/datafusion/tests/dataframe.rs
@@ -128,11 +128,14 @@ impl ExecutionPlan for CustomExecutionPlan {
 }
 
 impl TableProvider for CustomTableProvider {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
     fn schema(&self) -> SchemaRef {
         TEST_CUSTOM_SCHEMA_REF!()
     }
 
-    /// Create an ExecutionPlan that will scan the table.
     fn scan(
         &self,
         projection: &Option<Vec<usize>>,