You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2020/08/17 03:22:09 UTC
[arrow] branch master updated: ARROW-9757: [Rust] [DataFusion] Add
prelude.rs
This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 4d836ef ARROW-9757: [Rust] [DataFusion] Add prelude.rs
4d836ef is described below
commit 4d836ef4b044b8b15ed43fbceae387b0f26bda48
Author: Andy Grove <an...@gmail.com>
AuthorDate: Sun Aug 16 21:21:20 2020 -0600
ARROW-9757: [Rust] [DataFusion] Add prelude.rs
Users can now just add `use datafusion::prelude::*;` to bring in some key items such as `ExecutionContext` to make it easier to get started.
Rustdocs updated and improved as well.
Closes #7978 from andygrove/prelude
Authored-by: Andy Grove <an...@gmail.com>
Signed-off-by: Andy Grove <an...@gmail.com>
---
rust/datafusion/examples/csv_sql.rs | 3 +--
rust/datafusion/examples/dataframe.rs | 4 +--
rust/datafusion/examples/flight_server.rs | 2 +-
rust/datafusion/examples/memory_table_api.rs | 3 +--
rust/datafusion/examples/parquet_sql.rs | 3 ++-
rust/datafusion/src/dataframe.rs | 35 +++++++-----------------
rust/datafusion/src/execution/context.rs | 31 +++++++++++----------
rust/datafusion/src/lib.rs | 40 +++++++++++++++++++++++-----
rust/datafusion/src/{lib.rs => prelude.rs} | 36 +++++++++----------------
9 files changed, 79 insertions(+), 78 deletions(-)
diff --git a/rust/datafusion/examples/csv_sql.rs b/rust/datafusion/examples/csv_sql.rs
index a5f3837..771d99b 100644
--- a/rust/datafusion/examples/csv_sql.rs
+++ b/rust/datafusion/examples/csv_sql.rs
@@ -17,9 +17,8 @@
use arrow::util::pretty;
-use datafusion::datasource::csv::CsvReadOptions;
use datafusion::error::Result;
-use datafusion::ExecutionContext;
+use datafusion::prelude::*;
/// This example demonstrates executing a simple query against an Arrow data source (CSV) and
/// fetching results
diff --git a/rust/datafusion/examples/dataframe.rs b/rust/datafusion/examples/dataframe.rs
index 4b931b6..5b45d11 100644
--- a/rust/datafusion/examples/dataframe.rs
+++ b/rust/datafusion/examples/dataframe.rs
@@ -16,9 +16,9 @@
// under the License.
use arrow::util::pretty;
+
use datafusion::error::Result;
-use datafusion::logicalplan::{col, lit};
-use datafusion::ExecutionContext;
+use datafusion::prelude::*;
/// This example demonstrates executing a simple query against an Arrow data source (Parquet) and
/// fetching results, using the DataFrame trait
diff --git a/rust/datafusion/examples/flight_server.rs b/rust/datafusion/examples/flight_server.rs
index c71a758..0cb2858 100644
--- a/rust/datafusion/examples/flight_server.rs
+++ b/rust/datafusion/examples/flight_server.rs
@@ -23,7 +23,7 @@ use tonic::{Request, Response, Status, Streaming};
use datafusion::datasource::parquet::ParquetTable;
use datafusion::datasource::TableProvider;
-use datafusion::execution::context::ExecutionContext;
+use datafusion::prelude::*;
use arrow_flight::{
flight_service_server::FlightService, flight_service_server::FlightServiceServer,
diff --git a/rust/datafusion/examples/memory_table_api.rs b/rust/datafusion/examples/memory_table_api.rs
index ee85785..da94761 100644
--- a/rust/datafusion/examples/memory_table_api.rs
+++ b/rust/datafusion/examples/memory_table_api.rs
@@ -25,8 +25,7 @@ use arrow::util::pretty;
use datafusion::datasource::MemTable;
use datafusion::error::Result;
-use datafusion::execution::context::ExecutionContext;
-use datafusion::logicalplan::{col, lit};
+use datafusion::prelude::*;
/// This example demonstrates basic uses of the Table API on an in-memory table
fn main() -> Result<()> {
diff --git a/rust/datafusion/examples/parquet_sql.rs b/rust/datafusion/examples/parquet_sql.rs
index f73a2ae..6359023 100644
--- a/rust/datafusion/examples/parquet_sql.rs
+++ b/rust/datafusion/examples/parquet_sql.rs
@@ -16,8 +16,9 @@
// under the License.
use arrow::util::pretty;
+
use datafusion::error::Result;
-use datafusion::execution::context::ExecutionContext;
+use datafusion::prelude::*;
/// This example demonstrates executing a simple query against an Arrow data source (Parquet) and
/// fetching results
diff --git a/rust/datafusion/src/dataframe.rs b/rust/datafusion/src/dataframe.rs
index 75618d1..f89f06c 100644
--- a/rust/datafusion/src/dataframe.rs
+++ b/rust/datafusion/src/dataframe.rs
@@ -35,10 +35,8 @@ use std::sync::Arc;
/// The query can be executed by calling the `collect` method.
///
/// ```
-/// # use datafusion::ExecutionContext;
+/// # use datafusion::prelude::*;
/// # use datafusion::error::Result;
-/// # use datafusion::execution::physical_plan::csv::CsvReadOptions;
-/// # use datafusion::logicalplan::col;
/// # fn main() -> Result<()> {
/// let mut ctx = ExecutionContext::new();
/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
@@ -54,9 +52,8 @@ pub trait DataFrame {
/// specified columns.
///
/// ```
- /// # use datafusion::ExecutionContext;
+ /// # use datafusion::prelude::*;
/// # use datafusion::error::Result;
- /// # use datafusion::execution::physical_plan::csv::CsvReadOptions;
/// # fn main() -> Result<()> {
/// let mut ctx = ExecutionContext::new();
/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
@@ -69,10 +66,8 @@ pub trait DataFrame {
/// Create a projection based on arbitrary expressions.
///
/// ```
- /// # use datafusion::ExecutionContext;
+ /// # use datafusion::prelude::*;
/// # use datafusion::error::Result;
- /// # use datafusion::execution::physical_plan::csv::CsvReadOptions;
- /// # use datafusion::logicalplan::col;
/// # fn main() -> Result<()> {
/// let mut ctx = ExecutionContext::new();
/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
@@ -85,10 +80,8 @@ pub trait DataFrame {
/// Filter a DataFrame to only include rows that match the specified filter expression.
///
/// ```
- /// # use datafusion::ExecutionContext;
+ /// # use datafusion::prelude::*;
/// # use datafusion::error::Result;
- /// # use datafusion::execution::physical_plan::csv::CsvReadOptions;
- /// # use datafusion::logicalplan::col;
/// # fn main() -> Result<()> {
/// let mut ctx = ExecutionContext::new();
/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
@@ -101,10 +94,8 @@ pub trait DataFrame {
/// Perform an aggregate query with optional grouping expressions.
///
/// ```
- /// # use datafusion::ExecutionContext;
+ /// # use datafusion::prelude::*;
/// # use datafusion::error::Result;
- /// # use datafusion::execution::physical_plan::csv::CsvReadOptions;
- /// # use datafusion::logicalplan::col;
/// # fn main() -> Result<()> {
/// let mut ctx = ExecutionContext::new();
/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
@@ -126,10 +117,8 @@ pub trait DataFrame {
/// Limit the number of rows returned from this DataFrame.
///
/// ```
- /// # use datafusion::ExecutionContext;
+ /// # use datafusion::prelude::*;
/// # use datafusion::error::Result;
- /// # use datafusion::execution::physical_plan::csv::CsvReadOptions;
- /// # use datafusion::logicalplan::col;
/// # fn main() -> Result<()> {
/// let mut ctx = ExecutionContext::new();
/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
@@ -143,10 +132,8 @@ pub trait DataFrame {
/// a sort expression by calling its [sort](../logicalplan/enum.Expr.html#method.sort) method.
///
/// ```
- /// # use datafusion::ExecutionContext;
+ /// # use datafusion::prelude::*;
/// # use datafusion::error::Result;
- /// # use datafusion::execution::physical_plan::csv::CsvReadOptions;
- /// # use datafusion::logicalplan::col;
/// # fn main() -> Result<()> {
/// let mut ctx = ExecutionContext::new();
/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
@@ -159,10 +146,8 @@ pub trait DataFrame {
/// Executes this DataFrame and collects all results into a vector of RecordBatch.
///
/// ```
- /// # use datafusion::ExecutionContext;
+ /// # use datafusion::prelude::*;
/// # use datafusion::error::Result;
- /// # use datafusion::execution::physical_plan::csv::CsvReadOptions;
- /// # use datafusion::logicalplan::col;
/// # fn main() -> Result<()> {
/// let mut ctx = ExecutionContext::new();
/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
@@ -176,10 +161,8 @@ pub trait DataFrame {
/// where each column has a name, data type, and nullability attribute.
/// ```
- /// # use datafusion::ExecutionContext;
+ /// # use datafusion::prelude::*;
/// # use datafusion::error::Result;
- /// # use datafusion::execution::physical_plan::csv::CsvReadOptions;
- /// # use datafusion::logicalplan::col;
/// # fn main() -> Result<()> {
/// let mut ctx = ExecutionContext::new();
/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
diff --git a/rust/datafusion/src/execution/context.rs b/rust/datafusion/src/execution/context.rs
index 8f92aae..bafdf74 100644
--- a/rust/datafusion/src/execution/context.rs
+++ b/rust/datafusion/src/execution/context.rs
@@ -60,31 +60,34 @@ use crate::sql::{
/// * Execution a SQL query
///
/// The following example demonstrates how to use the context to execute a query against a CSV
-/// data source:
+/// data source using the DataFrame API:
///
/// ```
-/// use datafusion::ExecutionContext;
-/// use datafusion::execution::physical_plan::csv::CsvReadOptions;
-/// use datafusion::logicalplan::col;
-///
+/// use datafusion::prelude::*;
+/// # use datafusion::error::Result;
+/// # fn main() -> Result<()> {
/// let mut ctx = ExecutionContext::new();
-/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).unwrap();
-/// let df = df.filter(col("a").lt_eq(col("b"))).unwrap()
-/// .aggregate(vec![col("a")], vec![df.min(col("b")).unwrap()]).unwrap()
-/// .limit(100).unwrap();
+/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
+/// let df = df.filter(col("a").lt_eq(col("b")))?
+/// .aggregate(vec![col("a")], vec![df.min(col("b"))?])?
+/// .limit(100)?;
/// let results = df.collect();
+/// # Ok(())
+/// # }
/// ```
///
/// The following example demonstrates how to execute the same query using SQL:
///
/// ```
-/// use datafusion::ExecutionContext;
-/// use datafusion::execution::physical_plan::csv::CsvReadOptions;
-/// use datafusion::logicalplan::col;
+/// use datafusion::prelude::*;
///
+/// # use datafusion::error::Result;
+/// # fn main() -> Result<()> {
/// let mut ctx = ExecutionContext::new();
-/// ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new()).unwrap();
-/// let results = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100").unwrap();
+/// ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new())?;
+/// let results = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100")?;
+/// # Ok(())
+/// # }
/// ```
pub struct ExecutionContext {
/// Internal state for the context
diff --git a/rust/datafusion/src/lib.rs b/rust/datafusion/src/lib.rs
index 73897ee..0b30f47 100644
--- a/rust/datafusion/src/lib.rs
+++ b/rust/datafusion/src/lib.rs
@@ -15,17 +15,46 @@
// specific language governing permissions and limitations
// under the License.
+#![warn(missing_docs)]
+
//! DataFusion is an extensible query execution framework that uses
//! Apache Arrow as the memory model.
//!
-//! DataFusion supports both SQL and a Table/DataFrame-style API for building logical query plans
+//! DataFusion supports both SQL and a DataFrame API for building logical query plans
//! and also provides a query optimizer and execution engine capable of parallel execution
//! against partitioned data sources (CSV and Parquet) using threads.
//!
//! DataFusion currently supports simple projection, selection, and aggregate queries.
-
-#![warn(missing_docs)]
-
+//!
+/// [ExecutionContext](../execution/context/struct.ExecutionContext.html) is the main interface
+/// for executing queries with DataFusion.
+///
+/// The following example demonstrates how to use the context to execute a query against a CSV
+/// data source using the DataFrame API:
+///
+/// ```
+/// # use datafusion::prelude::*;
+/// # use datafusion::error::Result;
+/// # fn main() -> Result<()> {
+/// let mut ctx = ExecutionContext::new();
+/// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
+/// let df = df.filter(col("a").lt_eq(col("b")))?
+/// .aggregate(vec![col("a")], vec![df.min(col("b"))?])?
+/// .limit(100)?;
+/// let results = df.collect();
+/// # Ok(())
+/// # }
+/// ```
+///
+/// The following example demonstrates how to execute the same query using SQL:
+///
+/// ```
+/// use datafusion::prelude::*;
+///
+/// let mut ctx = ExecutionContext::new();
+/// ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new()).unwrap();
+/// let results = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100").unwrap();
+/// ```
extern crate arrow;
extern crate sqlparser;
@@ -35,9 +64,8 @@ pub mod error;
pub mod execution;
pub mod logicalplan;
pub mod optimizer;
+pub mod prelude;
pub mod sql;
-pub use execution::context::ExecutionContext;
-
#[cfg(test)]
pub mod test;
diff --git a/rust/datafusion/src/lib.rs b/rust/datafusion/src/prelude.rs
similarity index 51%
copy from rust/datafusion/src/lib.rs
copy to rust/datafusion/src/prelude.rs
index 73897ee..658f970 100644
--- a/rust/datafusion/src/lib.rs
+++ b/rust/datafusion/src/prelude.rs
@@ -13,31 +13,19 @@
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
-// under the License.
+// under the License.pub},
-//! DataFusion is an extensible query execution framework that uses
-//! Apache Arrow as the memory model.
+//! A "prelude" for users of the datafusion crate.
//!
-//! DataFusion supports both SQL and a Table/DataFrame-style API for building logical query plans
-//! and also provides a query optimizer and execution engine capable of parallel execution
-//! against partitioned data sources (CSV and Parquet) using threads.
+//! Like the standard library's prelude, this module simplifies importing of
+//! common items. Unlike the standard prelude, the contents of this module must
+//! be imported manually:
//!
-//! DataFusion currently supports simple projection, selection, and aggregate queries.
+//! ```
+//! use datafusion::prelude::*;
+//! ```
-#![warn(missing_docs)]
-
-extern crate arrow;
-extern crate sqlparser;
-
-pub mod dataframe;
-pub mod datasource;
-pub mod error;
-pub mod execution;
-pub mod logicalplan;
-pub mod optimizer;
-pub mod sql;
-
-pub use execution::context::ExecutionContext;
-
-#[cfg(test)]
-pub mod test;
+pub use crate::dataframe::DataFrame;
+pub use crate::execution::context::{ExecutionConfig, ExecutionContext};
+pub use crate::execution::physical_plan::csv::CsvReadOptions;
+pub use crate::logicalplan::{col, lit};