You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/06/12 10:24:30 UTC

[arrow-datafusion] branch main updated: Improve main api doc page, move `avro_to_arrow` to `datasource` (#6564)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new b8f90fe936 Improve main api doc page, move `avro_to_arrow` to `datasource` (#6564)
b8f90fe936 is described below

commit b8f90fe9366a7406afbf5bb3f3afe5854adcf26a
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Mon Jun 12 06:24:23 2023 -0400

    Improve main api doc page, move `avro_to_arrow` to `datasource` (#6564)
    
    * Improve main api doc page
    
    * fix doc examples
    
    * fmt
---
 datafusion/common/src/config.rs                            |  2 +-
 datafusion/core/src/catalog/mod.rs                         |  3 +--
 datafusion/core/src/dataframe.rs                           |  2 +-
 .../{ => datasource}/avro_to_arrow/arrow_array_reader.rs   |  2 +-
 datafusion/core/src/{ => datasource}/avro_to_arrow/mod.rs  |  4 +++-
 .../core/src/{ => datasource}/avro_to_arrow/reader.rs      | 14 +++++++++-----
 .../core/src/{ => datasource}/avro_to_arrow/schema.rs      |  0
 datafusion/core/src/datasource/file_format/avro.rs         |  2 +-
 datafusion/core/src/datasource/mod.rs                      |  5 ++++-
 datafusion/core/src/datasource/physical_plan/avro.rs       |  8 +++-----
 datafusion/core/src/error.rs                               |  2 +-
 datafusion/core/src/lib.rs                                 |  7 ++++---
 datafusion/core/src/physical_optimizer/mod.rs              |  9 ++++++---
 datafusion/core/src/prelude.rs                             |  2 +-
 datafusion/core/src/scalar.rs                              |  7 ++++---
 datafusion/core/src/variable/mod.rs                        |  2 +-
 datafusion/physical-expr/src/var_provider.rs               |  2 +-
 17 files changed, 42 insertions(+), 31 deletions(-)

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index c5ce3540fc..d71f46a1ce 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! DataFusion Configuration Options
+//! Runtime configuration, via [`ConfigOptions`]
 
 use crate::{DataFusionError, Result};
 use std::any::Any;
diff --git a/datafusion/core/src/catalog/mod.rs b/datafusion/core/src/catalog/mod.rs
index b7843ed66b..7696856b32 100644
--- a/datafusion/core/src/catalog/mod.rs
+++ b/datafusion/core/src/catalog/mod.rs
@@ -15,8 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! This module contains interfaces and default implementations
-//! of table namespacing concepts, including catalogs and schemas.
+//! Interfaces and default implementations of catalogs and schemas.
 
 // TODO(clippy): Having a `catalog::catalog` module path is unclear and ambiguous.
 // The parent module should probably be renamed to something that more accurately
diff --git a/datafusion/core/src/dataframe.rs b/datafusion/core/src/dataframe.rs
index 7d0fddcf82..2834fb571f 100644
--- a/datafusion/core/src/dataframe.rs
+++ b/datafusion/core/src/dataframe.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! DataFrame API for building and executing query plans.
+//! [`DataFrame`] API for building and executing query plans.
 
 use std::any::Any;
 use std::sync::Arc;
diff --git a/datafusion/core/src/avro_to_arrow/arrow_array_reader.rs b/datafusion/core/src/datasource/avro_to_arrow/arrow_array_reader.rs
similarity index 99%
rename from datafusion/core/src/avro_to_arrow/arrow_array_reader.rs
rename to datafusion/core/src/datasource/avro_to_arrow/arrow_array_reader.rs
index 311e199f28..0d95065697 100644
--- a/datafusion/core/src/avro_to_arrow/arrow_array_reader.rs
+++ b/datafusion/core/src/datasource/avro_to_arrow/arrow_array_reader.rs
@@ -957,7 +957,7 @@ where
 mod test {
     use crate::arrow::array::Array;
     use crate::arrow::datatypes::{Field, TimeUnit};
-    use crate::avro_to_arrow::{Reader, ReaderBuilder};
+    use crate::datasource::avro_to_arrow::{Reader, ReaderBuilder};
     use arrow::datatypes::DataType;
     use datafusion_common::cast::{
         as_int32_array, as_int64_array, as_list_array, as_timestamp_microsecond_array,
diff --git a/datafusion/core/src/avro_to_arrow/mod.rs b/datafusion/core/src/datasource/avro_to_arrow/mod.rs
similarity index 92%
rename from datafusion/core/src/avro_to_arrow/mod.rs
rename to datafusion/core/src/datasource/avro_to_arrow/mod.rs
index 8ca7f22ef3..af0bb86a3e 100644
--- a/datafusion/core/src/avro_to_arrow/mod.rs
+++ b/datafusion/core/src/datasource/avro_to_arrow/mod.rs
@@ -15,7 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! This module contains utilities to manipulate avro metadata.
+//! This module contains code for reading [Avro] data into `RecordBatch`es
+//!
+//! [Avro]: https://avro.apache.org/docs/1.2.0/
 
 #[cfg(feature = "avro")]
 mod arrow_array_reader;
diff --git a/datafusion/core/src/avro_to_arrow/reader.rs b/datafusion/core/src/datasource/avro_to_arrow/reader.rs
similarity index 96%
rename from datafusion/core/src/avro_to_arrow/reader.rs
rename to datafusion/core/src/datasource/avro_to_arrow/reader.rs
index c5dab22a2d..5dc53c5c86 100644
--- a/datafusion/core/src/avro_to_arrow/reader.rs
+++ b/datafusion/core/src/datasource/avro_to_arrow/reader.rs
@@ -56,17 +56,21 @@ impl ReaderBuilder {
     /// # Example
     ///
     /// ```
-    /// extern crate apache_avro;
-    ///
     /// use std::fs::File;
     ///
-    /// fn example() -> crate::datafusion::avro_to_arrow::Reader<'static, File> {
+    /// use datafusion::datasource::avro_to_arrow::{Reader, ReaderBuilder};
+    ///
+    /// fn example() -> Reader<'static, File> {
     ///     let file = File::open("test/data/basic.avro").unwrap();
     ///
     ///     // create a builder, inferring the schema with the first 100 records
-    ///     let builder = crate::datafusion::avro_to_arrow::ReaderBuilder::new().read_schema().with_batch_size(100);
+    ///     let builder = ReaderBuilder::new()
+    ///       .read_schema()
+    ///       .with_batch_size(100);
     ///
-    ///     let reader = builder.build::<File>(file).unwrap();
+    ///     let reader = builder
+    ///       .build::<File>(file)
+    ///       .unwrap();
     ///
     ///     reader
     /// }
diff --git a/datafusion/core/src/avro_to_arrow/schema.rs b/datafusion/core/src/datasource/avro_to_arrow/schema.rs
similarity index 100%
rename from datafusion/core/src/avro_to_arrow/schema.rs
rename to datafusion/core/src/datasource/avro_to_arrow/schema.rs
diff --git a/datafusion/core/src/datasource/file_format/avro.rs b/datafusion/core/src/datasource/file_format/avro.rs
index ab9f1f5dd0..cfb146e4ff 100644
--- a/datafusion/core/src/datasource/file_format/avro.rs
+++ b/datafusion/core/src/datasource/file_format/avro.rs
@@ -27,7 +27,7 @@ use datafusion_physical_expr::PhysicalExpr;
 use object_store::{GetResult, ObjectMeta, ObjectStore};
 
 use super::FileFormat;
-use crate::avro_to_arrow::read_avro_schema_from_reader;
+use crate::datasource::avro_to_arrow::read_avro_schema_from_reader;
 use crate::datasource::physical_plan::{AvroExec, FileScanConfig};
 use crate::error::Result;
 use crate::execution::context::SessionState;
diff --git a/datafusion/core/src/datasource/mod.rs b/datafusion/core/src/datasource/mod.rs
index 683afb7902..0f9a8c3d73 100644
--- a/datafusion/core/src/datasource/mod.rs
+++ b/datafusion/core/src/datasource/mod.rs
@@ -15,12 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! DataFusion data sources
+//! DataFusion data sources: [`TableProvider`] and [`ListingTable`]
+//!
+//! [`ListingTable`]: crate::datasource::listing::ListingTable
 
 // TODO(clippy): Having a `datasource::datasource` module path is unclear and ambiguous.
 // The child module should probably be renamed to something that more accurately
 // describes its content. Something along the lines of `provider`, or `providers`.
 #![allow(clippy::module_inception)]
+pub mod avro_to_arrow;
 pub mod datasource;
 pub mod default_table_source;
 pub mod empty;
diff --git a/datafusion/core/src/datasource/physical_plan/avro.rs b/datafusion/core/src/datasource/physical_plan/avro.rs
index 704a97ba7e..0c286ba19c 100644
--- a/datafusion/core/src/datasource/physical_plan/avro.rs
+++ b/datafusion/core/src/datasource/physical_plan/avro.rs
@@ -165,6 +165,7 @@ impl ExecutionPlan for AvroExec {
 #[cfg(feature = "avro")]
 mod private {
     use super::*;
+    use crate::datasource::avro_to_arrow::Reader as AvroReader;
     use crate::datasource::physical_plan::file_stream::{FileOpenFuture, FileOpener};
     use crate::datasource::physical_plan::FileMeta;
     use bytes::Buf;
@@ -179,11 +180,8 @@ mod private {
     }
 
     impl AvroConfig {
-        fn open<R: std::io::Read>(
-            &self,
-            reader: R,
-        ) -> Result<crate::avro_to_arrow::Reader<'static, R>> {
-            crate::avro_to_arrow::Reader::try_new(
+        fn open<R: std::io::Read>(&self, reader: R) -> Result<AvroReader<'static, R>> {
+            AvroReader::try_new(
                 reader,
                 self.schema.clone(),
                 self.batch_size,
diff --git a/datafusion/core/src/error.rs b/datafusion/core/src/error.rs
index 0a138c80df..5a5faa7896 100644
--- a/datafusion/core/src/error.rs
+++ b/datafusion/core/src/error.rs
@@ -15,5 +15,5 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! DataFusion error types
+//! DataFusion error type [`DataFusionError`] and [`Result`].
 pub use datafusion_common::{DataFusionError, Result, SharedResult};
diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index 3e58923c3a..ca44af7339 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -384,11 +384,12 @@
 //! and improve compilation times. The crates are:
 //!
 //! * [datafusion_common]: Common traits and types
-//! * [datafusion_execution]: State needed for execution
 //! * [datafusion_expr]: [`LogicalPlan`],  [`Expr`] and related logical planning structure
+//! * [datafusion_execution]: State and structures needed for execution
 //! * [datafusion_optimizer]: [`OptimizerRule`]s and [`AnalyzerRule`]s
 //! * [datafusion_physical_expr]: [`PhysicalExpr`] and related expressions
-//! * [datafusion_sql]:  [`SqlToRel`] SQL planner
+//! * [datafusion_row]: Row based representation
+//! * [datafusion_sql]: SQL planner ([`SqlToRel`])
 //!
 //! [sqlparser]: https://docs.rs/sqlparser/latest/sqlparser
 //! [`SqlToRel`]: sql::planner::SqlToRel
@@ -412,7 +413,6 @@ pub const DATAFUSION_VERSION: &str = env!("CARGO_PKG_VERSION");
 extern crate core;
 extern crate sqlparser;
 
-pub mod avro_to_arrow;
 pub mod catalog;
 pub mod dataframe;
 pub mod datasource;
@@ -431,6 +431,7 @@ pub use parquet;
 // re-export DataFusion crates
 pub use datafusion_common as common;
 pub use datafusion_common::config;
+pub use datafusion_execution;
 pub use datafusion_expr as logical_expr;
 pub use datafusion_optimizer as optimizer;
 pub use datafusion_physical_expr as physical_expr;
diff --git a/datafusion/core/src/physical_optimizer/mod.rs b/datafusion/core/src/physical_optimizer/mod.rs
index b4c019d62b..48a3d6ade7 100644
--- a/datafusion/core/src/physical_optimizer/mod.rs
+++ b/datafusion/core/src/physical_optimizer/mod.rs
@@ -15,9 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! This module contains a query optimizer that operates against a physical plan and applies
-//! rules to a physical plan, such as "Repartition".
-
+//! Optimizer that rewrites [`ExecutionPlan`]s.
+//!
+//! These rules take advantage of physical plan properties , such as
+//! "Repartition" or "Sortedness"
+//!
+//! [`ExecutionPlan`]: crate::physical_plan::ExecutionPlan
 pub mod aggregate_statistics;
 pub mod coalesce_batches;
 pub mod combine_partial_final_agg;
diff --git a/datafusion/core/src/prelude.rs b/datafusion/core/src/prelude.rs
index ed2c81a69f..d01d9c2390 100644
--- a/datafusion/core/src/prelude.rs
+++ b/datafusion/core/src/prelude.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.pub},
 
-//! A "prelude" for users of the datafusion crate.
+//! DataFusion "prelude" to simplify importing common types.
 //!
 //! Like the standard library's prelude, this module simplifies importing of
 //! common items. Unlike the standard prelude, the contents of this module must
diff --git a/datafusion/core/src/scalar.rs b/datafusion/core/src/scalar.rs
index 29f75096ae..c4f0d80616 100644
--- a/datafusion/core/src/scalar.rs
+++ b/datafusion/core/src/scalar.rs
@@ -15,7 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! ScalarValue reimported from datafusion-common to easy migration
-//! when datafusion was split into several different crates
-
+//! [`ScalarValue`] single value representation.
+//!
+//! Note this is reimported from the datafusion-common crate for easy
+//! migration when datafusion was split into several different crates
 pub use datafusion_common::{ScalarType, ScalarValue};
diff --git a/datafusion/core/src/variable/mod.rs b/datafusion/core/src/variable/mod.rs
index 6efa8eb862..5ef165313c 100644
--- a/datafusion/core/src/variable/mod.rs
+++ b/datafusion/core/src/variable/mod.rs
@@ -15,6 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Variable provider
+//! Variable provider for `@name` and `@@name` style runtime values.
 
 pub use datafusion_physical_expr::var_provider::{VarProvider, VarType};
diff --git a/datafusion/physical-expr/src/var_provider.rs b/datafusion/physical-expr/src/var_provider.rs
index faa07665e4..e00cf74072 100644
--- a/datafusion/physical-expr/src/var_provider.rs
+++ b/datafusion/physical-expr/src/var_provider.rs
@@ -29,7 +29,7 @@ pub enum VarType {
     UserDefined,
 }
 
-/// A var provider for @variable
+/// A var provider for `@variable` and `@@variable` runtime values.
 pub trait VarProvider: std::fmt::Debug {
     /// Get variable value
     fn get_value(&self, var_names: Vec<String>) -> Result<ScalarValue>;