You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2020/08/17 03:22:09 UTC

[arrow] branch master updated: ARROW-9757: [Rust] [DataFusion] Add prelude.rs

This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 4d836ef  ARROW-9757: [Rust] [DataFusion] Add prelude.rs
4d836ef is described below

commit 4d836ef4b044b8b15ed43fbceae387b0f26bda48
Author: Andy Grove <an...@gmail.com>
AuthorDate: Sun Aug 16 21:21:20 2020 -0600

    ARROW-9757: [Rust] [DataFusion] Add prelude.rs
    
    Users can now just add `use datafusion::prelude::*;` to bring in some key items such as `ExecutionContext` to make it easier to get started.
    
    Rustdocs updated and improved as well.
    
    Closes #7978 from andygrove/prelude
    
    Authored-by: Andy Grove <an...@gmail.com>
    Signed-off-by: Andy Grove <an...@gmail.com>
---
 rust/datafusion/examples/csv_sql.rs          |  3 +--
 rust/datafusion/examples/dataframe.rs        |  4 +--
 rust/datafusion/examples/flight_server.rs    |  2 +-
 rust/datafusion/examples/memory_table_api.rs |  3 +--
 rust/datafusion/examples/parquet_sql.rs      |  3 ++-
 rust/datafusion/src/dataframe.rs             | 35 +++++++-----------------
 rust/datafusion/src/execution/context.rs     | 31 +++++++++++----------
 rust/datafusion/src/lib.rs                   | 40 +++++++++++++++++++++++-----
 rust/datafusion/src/{lib.rs => prelude.rs}   | 36 +++++++++----------------
 9 files changed, 79 insertions(+), 78 deletions(-)

diff --git a/rust/datafusion/examples/csv_sql.rs b/rust/datafusion/examples/csv_sql.rs
index a5f3837..771d99b 100644
--- a/rust/datafusion/examples/csv_sql.rs
+++ b/rust/datafusion/examples/csv_sql.rs
@@ -17,9 +17,8 @@
 
 use arrow::util::pretty;
 
-use datafusion::datasource::csv::CsvReadOptions;
 use datafusion::error::Result;
-use datafusion::ExecutionContext;
+use datafusion::prelude::*;
 
 /// This example demonstrates executing a simple query against an Arrow data source (CSV) and
 /// fetching results
diff --git a/rust/datafusion/examples/dataframe.rs b/rust/datafusion/examples/dataframe.rs
index 4b931b6..5b45d11 100644
--- a/rust/datafusion/examples/dataframe.rs
+++ b/rust/datafusion/examples/dataframe.rs
@@ -16,9 +16,9 @@
 // under the License.
 
 use arrow::util::pretty;
+
 use datafusion::error::Result;
-use datafusion::logicalplan::{col, lit};
-use datafusion::ExecutionContext;
+use datafusion::prelude::*;
 
 /// This example demonstrates executing a simple query against an Arrow data source (Parquet) and
 /// fetching results, using the DataFrame trait
diff --git a/rust/datafusion/examples/flight_server.rs b/rust/datafusion/examples/flight_server.rs
index c71a758..0cb2858 100644
--- a/rust/datafusion/examples/flight_server.rs
+++ b/rust/datafusion/examples/flight_server.rs
@@ -23,7 +23,7 @@ use tonic::{Request, Response, Status, Streaming};
 
 use datafusion::datasource::parquet::ParquetTable;
 use datafusion::datasource::TableProvider;
-use datafusion::execution::context::ExecutionContext;
+use datafusion::prelude::*;
 
 use arrow_flight::{
     flight_service_server::FlightService, flight_service_server::FlightServiceServer,
diff --git a/rust/datafusion/examples/memory_table_api.rs b/rust/datafusion/examples/memory_table_api.rs
index ee85785..da94761 100644
--- a/rust/datafusion/examples/memory_table_api.rs
+++ b/rust/datafusion/examples/memory_table_api.rs
@@ -25,8 +25,7 @@ use arrow::util::pretty;
 
 use datafusion::datasource::MemTable;
 use datafusion::error::Result;
-use datafusion::execution::context::ExecutionContext;
-use datafusion::logicalplan::{col, lit};
+use datafusion::prelude::*;
 
 /// This example demonstrates basic uses of the Table API on an in-memory table
 fn main() -> Result<()> {
diff --git a/rust/datafusion/examples/parquet_sql.rs b/rust/datafusion/examples/parquet_sql.rs
index f73a2ae..6359023 100644
--- a/rust/datafusion/examples/parquet_sql.rs
+++ b/rust/datafusion/examples/parquet_sql.rs
@@ -16,8 +16,9 @@
 // under the License.
 
 use arrow::util::pretty;
+
 use datafusion::error::Result;
-use datafusion::execution::context::ExecutionContext;
+use datafusion::prelude::*;
 
 /// This example demonstrates executing a simple query against an Arrow data source (Parquet) and
 /// fetching results
diff --git a/rust/datafusion/src/dataframe.rs b/rust/datafusion/src/dataframe.rs
index 75618d1..f89f06c 100644
--- a/rust/datafusion/src/dataframe.rs
+++ b/rust/datafusion/src/dataframe.rs
@@ -35,10 +35,8 @@ use std::sync::Arc;
 /// The query can be executed by calling the `collect` method.
 ///
 /// ```
-/// # use datafusion::ExecutionContext;
+/// # use datafusion::prelude::*;
 /// # use datafusion::error::Result;
-/// # use datafusion::execution::physical_plan::csv::CsvReadOptions;
-/// # use datafusion::logicalplan::col;
 /// # fn main() -> Result<()> {
 /// let mut ctx = ExecutionContext::new();
 /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
@@ -54,9 +52,8 @@ pub trait DataFrame {
     /// specified columns.
     ///
     /// ```
-    /// # use datafusion::ExecutionContext;
+    /// # use datafusion::prelude::*;
     /// # use datafusion::error::Result;
-    /// # use datafusion::execution::physical_plan::csv::CsvReadOptions;
     /// # fn main() -> Result<()> {
     /// let mut ctx = ExecutionContext::new();
     /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
@@ -69,10 +66,8 @@ pub trait DataFrame {
     /// Create a projection based on arbitrary expressions.
     ///
     /// ```
-    /// # use datafusion::ExecutionContext;
+    /// # use datafusion::prelude::*;
     /// # use datafusion::error::Result;
-    /// # use datafusion::execution::physical_plan::csv::CsvReadOptions;
-    /// # use datafusion::logicalplan::col;
     /// # fn main() -> Result<()> {
     /// let mut ctx = ExecutionContext::new();
     /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
@@ -85,10 +80,8 @@ pub trait DataFrame {
     /// Filter a DataFrame to only include rows that match the specified filter expression.
     ///
     /// ```
-    /// # use datafusion::ExecutionContext;
+    /// # use datafusion::prelude::*;
     /// # use datafusion::error::Result;
-    /// # use datafusion::execution::physical_plan::csv::CsvReadOptions;
-    /// # use datafusion::logicalplan::col;
     /// # fn main() -> Result<()> {
     /// let mut ctx = ExecutionContext::new();
     /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
@@ -101,10 +94,8 @@ pub trait DataFrame {
     /// Perform an aggregate query with optional grouping expressions.
     ///
     /// ```
-    /// # use datafusion::ExecutionContext;
+    /// # use datafusion::prelude::*;
     /// # use datafusion::error::Result;
-    /// # use datafusion::execution::physical_plan::csv::CsvReadOptions;
-    /// # use datafusion::logicalplan::col;
     /// # fn main() -> Result<()> {
     /// let mut ctx = ExecutionContext::new();
     /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
@@ -126,10 +117,8 @@ pub trait DataFrame {
     /// Limit the number of rows returned from this DataFrame.
     ///
     /// ```
-    /// # use datafusion::ExecutionContext;
+    /// # use datafusion::prelude::*;
     /// # use datafusion::error::Result;
-    /// # use datafusion::execution::physical_plan::csv::CsvReadOptions;
-    /// # use datafusion::logicalplan::col;
     /// # fn main() -> Result<()> {
     /// let mut ctx = ExecutionContext::new();
     /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
@@ -143,10 +132,8 @@ pub trait DataFrame {
     /// a sort expression by calling its [sort](../logicalplan/enum.Expr.html#method.sort) method.
     ///
     /// ```
-    /// # use datafusion::ExecutionContext;
+    /// # use datafusion::prelude::*;
     /// # use datafusion::error::Result;
-    /// # use datafusion::execution::physical_plan::csv::CsvReadOptions;
-    /// # use datafusion::logicalplan::col;
     /// # fn main() -> Result<()> {
     /// let mut ctx = ExecutionContext::new();
     /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
@@ -159,10 +146,8 @@ pub trait DataFrame {
     /// Executes this DataFrame and collects all results into a vector of RecordBatch.
     ///
     /// ```
-    /// # use datafusion::ExecutionContext;
+    /// # use datafusion::prelude::*;
     /// # use datafusion::error::Result;
-    /// # use datafusion::execution::physical_plan::csv::CsvReadOptions;
-    /// # use datafusion::logicalplan::col;
     /// # fn main() -> Result<()> {
     /// let mut ctx = ExecutionContext::new();
     /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
@@ -176,10 +161,8 @@ pub trait DataFrame {
     /// where each column has a name, data type, and nullability attribute.
 
     /// ```
-    /// # use datafusion::ExecutionContext;
+    /// # use datafusion::prelude::*;
     /// # use datafusion::error::Result;
-    /// # use datafusion::execution::physical_plan::csv::CsvReadOptions;
-    /// # use datafusion::logicalplan::col;
     /// # fn main() -> Result<()> {
     /// let mut ctx = ExecutionContext::new();
     /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
diff --git a/rust/datafusion/src/execution/context.rs b/rust/datafusion/src/execution/context.rs
index 8f92aae..bafdf74 100644
--- a/rust/datafusion/src/execution/context.rs
+++ b/rust/datafusion/src/execution/context.rs
@@ -60,31 +60,34 @@ use crate::sql::{
 /// * Execution a SQL query
 ///
 /// The following example demonstrates how to use the context to execute a query against a CSV
-/// data source:
+/// data source using the DataFrame API:
 ///
 /// ```
-/// use datafusion::ExecutionContext;
-/// use datafusion::execution::physical_plan::csv::CsvReadOptions;
-/// use datafusion::logicalplan::col;
-///
+/// use datafusion::prelude::*;
+/// # use datafusion::error::Result;
+/// # fn main() -> Result<()> {
 /// let mut ctx = ExecutionContext::new();
-/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).unwrap();
-/// let df = df.filter(col("a").lt_eq(col("b"))).unwrap()
-///            .aggregate(vec![col("a")], vec![df.min(col("b")).unwrap()]).unwrap()
-///            .limit(100).unwrap();
+/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
+/// let df = df.filter(col("a").lt_eq(col("b")))?
+///            .aggregate(vec![col("a")], vec![df.min(col("b"))?])?
+///            .limit(100)?;
 /// let results = df.collect();
+/// # Ok(())
+/// # }
 /// ```
 ///
 /// The following example demonstrates how to execute the same query using SQL:
 ///
 /// ```
-/// use datafusion::ExecutionContext;
-/// use datafusion::execution::physical_plan::csv::CsvReadOptions;
-/// use datafusion::logicalplan::col;
+/// use datafusion::prelude::*;
 ///
+/// # use datafusion::error::Result;
+/// # fn main() -> Result<()> {
 /// let mut ctx = ExecutionContext::new();
-/// ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new()).unwrap();
-/// let results = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100").unwrap();
+/// ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new())?;
+/// let results = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100")?;
+/// # Ok(())
+/// # }
 /// ```
 pub struct ExecutionContext {
     /// Internal state for the context
diff --git a/rust/datafusion/src/lib.rs b/rust/datafusion/src/lib.rs
index 73897ee..0b30f47 100644
--- a/rust/datafusion/src/lib.rs
+++ b/rust/datafusion/src/lib.rs
@@ -15,17 +15,46 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![warn(missing_docs)]
+
 //! DataFusion is an extensible query execution framework that uses
 //! Apache Arrow as the memory model.
 //!
-//! DataFusion supports both SQL and a Table/DataFrame-style API for building logical query plans
+//! DataFusion supports both SQL and a DataFrame API for building logical query plans
 //! and also provides a query optimizer and execution engine capable of parallel execution
 //! against partitioned data sources (CSV and Parquet) using threads.
 //!
 //! DataFusion currently supports simple projection, selection, and aggregate queries.
-
-#![warn(missing_docs)]
-
+//!
+/// [ExecutionContext](../execution/context/struct.ExecutionContext.html) is the main interface
+/// for executing queries with DataFusion.
+///
+/// The following example demonstrates how to use the context to execute a query against a CSV
+/// data source using the DataFrame API:
+///
+/// ```
+/// # use datafusion::prelude::*;
+/// # use datafusion::error::Result;
+/// # fn main() -> Result<()> {
+/// let mut ctx = ExecutionContext::new();
+/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
+/// let df = df.filter(col("a").lt_eq(col("b")))?
+///            .aggregate(vec![col("a")], vec![df.min(col("b"))?])?
+///            .limit(100)?;
+/// let results = df.collect();
+/// # Ok(())
+/// # }
+/// ```
+///
+/// The following example demonstrates how to execute the same query using SQL:
+///
+/// ```
+/// use datafusion::prelude::*;
+///
+/// let mut ctx = ExecutionContext::new();
+/// ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new()).unwrap();
+/// let results = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100").unwrap();
+/// ```
 extern crate arrow;
 extern crate sqlparser;
 
@@ -35,9 +64,8 @@ pub mod error;
 pub mod execution;
 pub mod logicalplan;
 pub mod optimizer;
+pub mod prelude;
 pub mod sql;
 
-pub use execution::context::ExecutionContext;
-
 #[cfg(test)]
 pub mod test;
diff --git a/rust/datafusion/src/lib.rs b/rust/datafusion/src/prelude.rs
similarity index 51%
copy from rust/datafusion/src/lib.rs
copy to rust/datafusion/src/prelude.rs
index 73897ee..658f970 100644
--- a/rust/datafusion/src/lib.rs
+++ b/rust/datafusion/src/prelude.rs
@@ -13,31 +13,19 @@
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
-// under the License.
+// under the License.pub},
 
-//! DataFusion is an extensible query execution framework that uses
-//! Apache Arrow as the memory model.
+//! A "prelude" for users of the datafusion crate.
 //!
-//! DataFusion supports both SQL and a Table/DataFrame-style API for building logical query plans
-//! and also provides a query optimizer and execution engine capable of parallel execution
-//! against partitioned data sources (CSV and Parquet) using threads.
+//! Like the standard library's prelude, this module simplifies importing of
+//! common items. Unlike the standard prelude, the contents of this module must
+//! be imported manually:
 //!
-//! DataFusion currently supports simple projection, selection, and aggregate queries.
+//! ```
+//! use datafusion::prelude::*;
+//! ```
 
-#![warn(missing_docs)]
-
-extern crate arrow;
-extern crate sqlparser;
-
-pub mod dataframe;
-pub mod datasource;
-pub mod error;
-pub mod execution;
-pub mod logicalplan;
-pub mod optimizer;
-pub mod sql;
-
-pub use execution::context::ExecutionContext;
-
-#[cfg(test)]
-pub mod test;
+pub use crate::dataframe::DataFrame;
+pub use crate::execution::context::{ExecutionConfig, ExecutionContext};
+pub use crate::execution::physical_plan::csv::CsvReadOptions;
+pub use crate::logicalplan::{col, lit};