You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/06/12 10:24:30 UTC
[arrow-datafusion] branch main updated: Improve main api doc page, move `avro_to_arrow` to `datasource` (#6564)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new b8f90fe936 Improve main api doc page, move `avro_to_arrow` to `datasource` (#6564)
b8f90fe936 is described below
commit b8f90fe9366a7406afbf5bb3f3afe5854adcf26a
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Mon Jun 12 06:24:23 2023 -0400
Improve main api doc page, move `avro_to_arrow` to `datasource` (#6564)
* Improve main api doc page
* fix doc examples
* fmt
---
datafusion/common/src/config.rs | 2 +-
datafusion/core/src/catalog/mod.rs | 3 +--
datafusion/core/src/dataframe.rs | 2 +-
.../{ => datasource}/avro_to_arrow/arrow_array_reader.rs | 2 +-
datafusion/core/src/{ => datasource}/avro_to_arrow/mod.rs | 4 +++-
.../core/src/{ => datasource}/avro_to_arrow/reader.rs | 14 +++++++++-----
.../core/src/{ => datasource}/avro_to_arrow/schema.rs | 0
datafusion/core/src/datasource/file_format/avro.rs | 2 +-
datafusion/core/src/datasource/mod.rs | 5 ++++-
datafusion/core/src/datasource/physical_plan/avro.rs | 8 +++-----
datafusion/core/src/error.rs | 2 +-
datafusion/core/src/lib.rs | 7 ++++---
datafusion/core/src/physical_optimizer/mod.rs | 9 ++++++---
datafusion/core/src/prelude.rs | 2 +-
datafusion/core/src/scalar.rs | 7 ++++---
datafusion/core/src/variable/mod.rs | 2 +-
datafusion/physical-expr/src/var_provider.rs | 2 +-
17 files changed, 42 insertions(+), 31 deletions(-)
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index c5ce3540fc..d71f46a1ce 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-//! DataFusion Configuration Options
+//! Runtime configuration, via [`ConfigOptions`]
use crate::{DataFusionError, Result};
use std::any::Any;
diff --git a/datafusion/core/src/catalog/mod.rs b/datafusion/core/src/catalog/mod.rs
index b7843ed66b..7696856b32 100644
--- a/datafusion/core/src/catalog/mod.rs
+++ b/datafusion/core/src/catalog/mod.rs
@@ -15,8 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-//! This module contains interfaces and default implementations
-//! of table namespacing concepts, including catalogs and schemas.
+//! Interfaces and default implementations of catalogs and schemas.
// TODO(clippy): Having a `catalog::catalog` module path is unclear and ambiguous.
// The parent module should probably be renamed to something that more accurately
diff --git a/datafusion/core/src/dataframe.rs b/datafusion/core/src/dataframe.rs
index 7d0fddcf82..2834fb571f 100644
--- a/datafusion/core/src/dataframe.rs
+++ b/datafusion/core/src/dataframe.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-//! DataFrame API for building and executing query plans.
+//! [`DataFrame`] API for building and executing query plans.
use std::any::Any;
use std::sync::Arc;
diff --git a/datafusion/core/src/avro_to_arrow/arrow_array_reader.rs b/datafusion/core/src/datasource/avro_to_arrow/arrow_array_reader.rs
similarity index 99%
rename from datafusion/core/src/avro_to_arrow/arrow_array_reader.rs
rename to datafusion/core/src/datasource/avro_to_arrow/arrow_array_reader.rs
index 311e199f28..0d95065697 100644
--- a/datafusion/core/src/avro_to_arrow/arrow_array_reader.rs
+++ b/datafusion/core/src/datasource/avro_to_arrow/arrow_array_reader.rs
@@ -957,7 +957,7 @@ where
mod test {
use crate::arrow::array::Array;
use crate::arrow::datatypes::{Field, TimeUnit};
- use crate::avro_to_arrow::{Reader, ReaderBuilder};
+ use crate::datasource::avro_to_arrow::{Reader, ReaderBuilder};
use arrow::datatypes::DataType;
use datafusion_common::cast::{
as_int32_array, as_int64_array, as_list_array, as_timestamp_microsecond_array,
diff --git a/datafusion/core/src/avro_to_arrow/mod.rs b/datafusion/core/src/datasource/avro_to_arrow/mod.rs
similarity index 92%
rename from datafusion/core/src/avro_to_arrow/mod.rs
rename to datafusion/core/src/datasource/avro_to_arrow/mod.rs
index 8ca7f22ef3..af0bb86a3e 100644
--- a/datafusion/core/src/avro_to_arrow/mod.rs
+++ b/datafusion/core/src/datasource/avro_to_arrow/mod.rs
@@ -15,7 +15,9 @@
// specific language governing permissions and limitations
// under the License.
-//! This module contains utilities to manipulate avro metadata.
+//! This module contains code for reading [Avro] data into `RecordBatch`es
+//!
+//! [Avro]: https://avro.apache.org/docs/1.2.0/
#[cfg(feature = "avro")]
mod arrow_array_reader;
diff --git a/datafusion/core/src/avro_to_arrow/reader.rs b/datafusion/core/src/datasource/avro_to_arrow/reader.rs
similarity index 96%
rename from datafusion/core/src/avro_to_arrow/reader.rs
rename to datafusion/core/src/datasource/avro_to_arrow/reader.rs
index c5dab22a2d..5dc53c5c86 100644
--- a/datafusion/core/src/avro_to_arrow/reader.rs
+++ b/datafusion/core/src/datasource/avro_to_arrow/reader.rs
@@ -56,17 +56,21 @@ impl ReaderBuilder {
/// # Example
///
/// ```
- /// extern crate apache_avro;
- ///
/// use std::fs::File;
///
- /// fn example() -> crate::datafusion::avro_to_arrow::Reader<'static, File> {
+ /// use datafusion::datasource::avro_to_arrow::{Reader, ReaderBuilder};
+ ///
+ /// fn example() -> Reader<'static, File> {
/// let file = File::open("test/data/basic.avro").unwrap();
///
/// // create a builder, inferring the schema with the first 100 records
- /// let builder = crate::datafusion::avro_to_arrow::ReaderBuilder::new().read_schema().with_batch_size(100);
+ /// let builder = ReaderBuilder::new()
+ /// .read_schema()
+ /// .with_batch_size(100);
///
- /// let reader = builder.build::<File>(file).unwrap();
+ /// let reader = builder
+ /// .build::<File>(file)
+ /// .unwrap();
///
/// reader
/// }
diff --git a/datafusion/core/src/avro_to_arrow/schema.rs b/datafusion/core/src/datasource/avro_to_arrow/schema.rs
similarity index 100%
rename from datafusion/core/src/avro_to_arrow/schema.rs
rename to datafusion/core/src/datasource/avro_to_arrow/schema.rs
diff --git a/datafusion/core/src/datasource/file_format/avro.rs b/datafusion/core/src/datasource/file_format/avro.rs
index ab9f1f5dd0..cfb146e4ff 100644
--- a/datafusion/core/src/datasource/file_format/avro.rs
+++ b/datafusion/core/src/datasource/file_format/avro.rs
@@ -27,7 +27,7 @@ use datafusion_physical_expr::PhysicalExpr;
use object_store::{GetResult, ObjectMeta, ObjectStore};
use super::FileFormat;
-use crate::avro_to_arrow::read_avro_schema_from_reader;
+use crate::datasource::avro_to_arrow::read_avro_schema_from_reader;
use crate::datasource::physical_plan::{AvroExec, FileScanConfig};
use crate::error::Result;
use crate::execution::context::SessionState;
diff --git a/datafusion/core/src/datasource/mod.rs b/datafusion/core/src/datasource/mod.rs
index 683afb7902..0f9a8c3d73 100644
--- a/datafusion/core/src/datasource/mod.rs
+++ b/datafusion/core/src/datasource/mod.rs
@@ -15,12 +15,15 @@
// specific language governing permissions and limitations
// under the License.
-//! DataFusion data sources
+//! DataFusion data sources: [`TableProvider`] and [`ListingTable`]
+//!
+//! [`ListingTable`]: crate::datasource::listing::ListingTable
// TODO(clippy): Having a `datasource::datasource` module path is unclear and ambiguous.
// The child module should probably be renamed to something that more accurately
// describes its content. Something along the lines of `provider`, or `providers`.
#![allow(clippy::module_inception)]
+pub mod avro_to_arrow;
pub mod datasource;
pub mod default_table_source;
pub mod empty;
diff --git a/datafusion/core/src/datasource/physical_plan/avro.rs b/datafusion/core/src/datasource/physical_plan/avro.rs
index 704a97ba7e..0c286ba19c 100644
--- a/datafusion/core/src/datasource/physical_plan/avro.rs
+++ b/datafusion/core/src/datasource/physical_plan/avro.rs
@@ -165,6 +165,7 @@ impl ExecutionPlan for AvroExec {
#[cfg(feature = "avro")]
mod private {
use super::*;
+ use crate::datasource::avro_to_arrow::Reader as AvroReader;
use crate::datasource::physical_plan::file_stream::{FileOpenFuture, FileOpener};
use crate::datasource::physical_plan::FileMeta;
use bytes::Buf;
@@ -179,11 +180,8 @@ mod private {
}
impl AvroConfig {
- fn open<R: std::io::Read>(
- &self,
- reader: R,
- ) -> Result<crate::avro_to_arrow::Reader<'static, R>> {
- crate::avro_to_arrow::Reader::try_new(
+ fn open<R: std::io::Read>(&self, reader: R) -> Result<AvroReader<'static, R>> {
+ AvroReader::try_new(
reader,
self.schema.clone(),
self.batch_size,
diff --git a/datafusion/core/src/error.rs b/datafusion/core/src/error.rs
index 0a138c80df..5a5faa7896 100644
--- a/datafusion/core/src/error.rs
+++ b/datafusion/core/src/error.rs
@@ -15,5 +15,5 @@
// specific language governing permissions and limitations
// under the License.
-//! DataFusion error types
+//! DataFusion error type [`DataFusionError`] and [`Result`].
pub use datafusion_common::{DataFusionError, Result, SharedResult};
diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index 3e58923c3a..ca44af7339 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -384,11 +384,12 @@
//! and improve compilation times. The crates are:
//!
//! * [datafusion_common]: Common traits and types
-//! * [datafusion_execution]: State needed for execution
//! * [datafusion_expr]: [`LogicalPlan`], [`Expr`] and related logical planning structure
+//! * [datafusion_execution]: State and structures needed for execution
//! * [datafusion_optimizer]: [`OptimizerRule`]s and [`AnalyzerRule`]s
//! * [datafusion_physical_expr]: [`PhysicalExpr`] and related expressions
-//! * [datafusion_sql]: [`SqlToRel`] SQL planner
+//! * [datafusion_row]: Row based representation
+//! * [datafusion_sql]: SQL planner ([`SqlToRel`])
//!
//! [sqlparser]: https://docs.rs/sqlparser/latest/sqlparser
//! [`SqlToRel`]: sql::planner::SqlToRel
@@ -412,7 +413,6 @@ pub const DATAFUSION_VERSION: &str = env!("CARGO_PKG_VERSION");
extern crate core;
extern crate sqlparser;
-pub mod avro_to_arrow;
pub mod catalog;
pub mod dataframe;
pub mod datasource;
@@ -431,6 +431,7 @@ pub use parquet;
// re-export DataFusion crates
pub use datafusion_common as common;
pub use datafusion_common::config;
+pub use datafusion_execution;
pub use datafusion_expr as logical_expr;
pub use datafusion_optimizer as optimizer;
pub use datafusion_physical_expr as physical_expr;
diff --git a/datafusion/core/src/physical_optimizer/mod.rs b/datafusion/core/src/physical_optimizer/mod.rs
index b4c019d62b..48a3d6ade7 100644
--- a/datafusion/core/src/physical_optimizer/mod.rs
+++ b/datafusion/core/src/physical_optimizer/mod.rs
@@ -15,9 +15,12 @@
// specific language governing permissions and limitations
// under the License.
-//! This module contains a query optimizer that operates against a physical plan and applies
-//! rules to a physical plan, such as "Repartition".
-
+//! Optimizer that rewrites [`ExecutionPlan`]s.
+//!
+//! These rules take advantage of physical plan properties , such as
+//! "Repartition" or "Sortedness"
+//!
+//! [`ExecutionPlan`]: crate::physical_plan::ExecutionPlan
pub mod aggregate_statistics;
pub mod coalesce_batches;
pub mod combine_partial_final_agg;
diff --git a/datafusion/core/src/prelude.rs b/datafusion/core/src/prelude.rs
index ed2c81a69f..d01d9c2390 100644
--- a/datafusion/core/src/prelude.rs
+++ b/datafusion/core/src/prelude.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.pub},
-//! A "prelude" for users of the datafusion crate.
+//! DataFusion "prelude" to simplify importing common types.
//!
//! Like the standard library's prelude, this module simplifies importing of
//! common items. Unlike the standard prelude, the contents of this module must
diff --git a/datafusion/core/src/scalar.rs b/datafusion/core/src/scalar.rs
index 29f75096ae..c4f0d80616 100644
--- a/datafusion/core/src/scalar.rs
+++ b/datafusion/core/src/scalar.rs
@@ -15,7 +15,8 @@
// specific language governing permissions and limitations
// under the License.
-//! ScalarValue reimported from datafusion-common to easy migration
-//! when datafusion was split into several different crates
-
+//! [`ScalarValue`] single value representation.
+//!
+//! Note this is reimported from the datafusion-common crate for easy
+//! migration when datafusion was split into several different crates
pub use datafusion_common::{ScalarType, ScalarValue};
diff --git a/datafusion/core/src/variable/mod.rs b/datafusion/core/src/variable/mod.rs
index 6efa8eb862..5ef165313c 100644
--- a/datafusion/core/src/variable/mod.rs
+++ b/datafusion/core/src/variable/mod.rs
@@ -15,6 +15,6 @@
// specific language governing permissions and limitations
// under the License.
-//! Variable provider
+//! Variable provider for `@name` and `@@name` style runtime values.
pub use datafusion_physical_expr::var_provider::{VarProvider, VarType};
diff --git a/datafusion/physical-expr/src/var_provider.rs b/datafusion/physical-expr/src/var_provider.rs
index faa07665e4..e00cf74072 100644
--- a/datafusion/physical-expr/src/var_provider.rs
+++ b/datafusion/physical-expr/src/var_provider.rs
@@ -29,7 +29,7 @@ pub enum VarType {
UserDefined,
}
-/// A var provider for @variable
+/// A var provider for `@variable` and `@@variable` runtime values.
pub trait VarProvider: std::fmt::Debug {
/// Get variable value
fn get_value(&self, var_names: Vec<String>) -> Result<ScalarValue>;