You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/06/13 00:33:39 UTC
[arrow-datafusion] branch main updated: Fix Clippy module inception (unwrap `datasource::datasource` and `catalog::catalog` (#6640)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new db70251854 Fix Clippy module inception (unwrap `datasource::datasource` and `catalog::catalog` (#6640)
db70251854 is described below
commit db70251854440b95ed419c5b63a982c8d0fc80b2
Author: Louis GariƩpy <68...@users.noreply.github.com>
AuthorDate: Mon Jun 12 20:33:33 2023 -0400
Fix Clippy module inception (unwrap `datasource::datasource` and `catalog::catalog` (#6640)
* Move `catalog::catalog` contents into parent.
* Rename `datasource::datasource` to `datasource::provider`.
* Remove clippy allow.
* Fix `catalog::catalog` references that slipped through.
* Fix doctest.
* Add missing include
---------
Co-authored-by: Andrew Lamb <an...@nerdnetworks.org>
---
datafusion-cli/src/catalog.rs | 2 +-
datafusion-examples/examples/catalog.rs | 2 +-
datafusion/core/src/catalog/catalog.rs | 276 ---------------------
datafusion/core/src/catalog/information_schema.rs | 2 +-
datafusion/core/src/catalog/listing_schema.rs | 2 +-
datafusion/core/src/catalog/mod.rs | 263 +++++++++++++++++++-
datafusion/core/src/catalog/schema.rs | 2 +-
datafusion/core/src/datasource/file_format/mod.rs | 2 +-
.../core/src/datasource/listing_table_factory.rs | 2 +-
datafusion/core/src/datasource/mod.rs | 8 +-
.../src/datasource/{datasource.rs => provider.rs} | 0
datafusion/core/src/execution/context.rs | 6 +-
datafusion/core/src/lib.rs | 2 +-
datafusion/core/src/test_util/mod.rs | 2 +-
datafusion/core/tests/provider_filter_pushdown.rs | 2 +-
datafusion/core/tests/sql/information_schema.rs | 2 +-
datafusion/core/tests/sql/order.rs | 2 +-
datafusion/proto/src/logical_plan/mod.rs | 2 +-
18 files changed, 275 insertions(+), 304 deletions(-)
diff --git a/datafusion-cli/src/catalog.rs b/datafusion-cli/src/catalog.rs
index dbd6751a4f..d790e3118a 100644
--- a/datafusion-cli/src/catalog.rs
+++ b/datafusion-cli/src/catalog.rs
@@ -16,8 +16,8 @@
// under the License.
use async_trait::async_trait;
-use datafusion::catalog::catalog::{CatalogList, CatalogProvider};
use datafusion::catalog::schema::SchemaProvider;
+use datafusion::catalog::{CatalogList, CatalogProvider};
use datafusion::datasource::listing::{
ListingTable, ListingTableConfig, ListingTableUrl,
};
diff --git a/datafusion-examples/examples/catalog.rs b/datafusion-examples/examples/catalog.rs
index 30cc2c8bd6..5ae510aab2 100644
--- a/datafusion-examples/examples/catalog.rs
+++ b/datafusion-examples/examples/catalog.rs
@@ -23,8 +23,8 @@ use async_trait::async_trait;
use datafusion::{
arrow::util::pretty,
catalog::{
- catalog::{CatalogList, CatalogProvider},
schema::SchemaProvider,
+ {CatalogList, CatalogProvider},
},
datasource::{
file_format::{csv::CsvFormat, parquet::ParquetFormat, FileFormat},
diff --git a/datafusion/core/src/catalog/catalog.rs b/datafusion/core/src/catalog/catalog.rs
deleted file mode 100644
index 393d98dcb8..0000000000
--- a/datafusion/core/src/catalog/catalog.rs
+++ /dev/null
@@ -1,276 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Describes the interface and built-in implementations of catalogs,
-//! representing collections of named schemas.
-
-use crate::catalog::schema::SchemaProvider;
-use dashmap::DashMap;
-use datafusion_common::{DataFusionError, Result};
-use std::any::Any;
-use std::sync::Arc;
-
-/// Represent a list of named catalogs
-pub trait CatalogList: Sync + Send {
- /// Returns the catalog list as [`Any`](std::any::Any)
- /// so that it can be downcast to a specific implementation.
- fn as_any(&self) -> &dyn Any;
-
- /// Adds a new catalog to this catalog list
- /// If a catalog of the same name existed before, it is replaced in the list and returned.
- fn register_catalog(
- &self,
- name: String,
- catalog: Arc<dyn CatalogProvider>,
- ) -> Option<Arc<dyn CatalogProvider>>;
-
- /// Retrieves the list of available catalog names
- fn catalog_names(&self) -> Vec<String>;
-
- /// Retrieves a specific catalog by name, provided it exists.
- fn catalog(&self, name: &str) -> Option<Arc<dyn CatalogProvider>>;
-}
-
-/// Simple in-memory list of catalogs
-pub struct MemoryCatalogList {
- /// Collection of catalogs containing schemas and ultimately TableProviders
- pub catalogs: DashMap<String, Arc<dyn CatalogProvider>>,
-}
-
-impl MemoryCatalogList {
- /// Instantiates a new `MemoryCatalogList` with an empty collection of catalogs
- pub fn new() -> Self {
- Self {
- catalogs: DashMap::new(),
- }
- }
-}
-
-impl Default for MemoryCatalogList {
- fn default() -> Self {
- Self::new()
- }
-}
-
-impl CatalogList for MemoryCatalogList {
- fn as_any(&self) -> &dyn Any {
- self
- }
-
- fn register_catalog(
- &self,
- name: String,
- catalog: Arc<dyn CatalogProvider>,
- ) -> Option<Arc<dyn CatalogProvider>> {
- self.catalogs.insert(name, catalog)
- }
-
- fn catalog_names(&self) -> Vec<String> {
- self.catalogs.iter().map(|c| c.key().clone()).collect()
- }
-
- fn catalog(&self, name: &str) -> Option<Arc<dyn CatalogProvider>> {
- self.catalogs.get(name).map(|c| c.value().clone())
- }
-}
-
-impl Default for MemoryCatalogProvider {
- fn default() -> Self {
- Self::new()
- }
-}
-
-/// Represents a catalog, comprising a number of named schemas.
-pub trait CatalogProvider: Sync + Send {
- /// Returns the catalog provider as [`Any`](std::any::Any)
- /// so that it can be downcast to a specific implementation.
- fn as_any(&self) -> &dyn Any;
-
- /// Retrieves the list of available schema names in this catalog.
- fn schema_names(&self) -> Vec<String>;
-
- /// Retrieves a specific schema from the catalog by name, provided it exists.
- fn schema(&self, name: &str) -> Option<Arc<dyn SchemaProvider>>;
-
- /// Adds a new schema to this catalog.
- ///
- /// If a schema of the same name existed before, it is replaced in
- /// the catalog and returned.
- ///
- /// By default returns a "Not Implemented" error
- fn register_schema(
- &self,
- name: &str,
- schema: Arc<dyn SchemaProvider>,
- ) -> Result<Option<Arc<dyn SchemaProvider>>> {
- // use variables to avoid unused variable warnings
- let _ = name;
- let _ = schema;
- Err(DataFusionError::NotImplemented(
- "Registering new schemas is not supported".to_string(),
- ))
- }
-
- /// Removes a schema from this catalog. Implementations of this method should return
- /// errors if the schema exists but cannot be dropped. For example, in DataFusion's
- /// default in-memory catalog, [`MemoryCatalogProvider`], a non-empty schema
- /// will only be successfully dropped when `cascade` is true.
- /// This is equivalent to how DROP SCHEMA works in PostgreSQL.
- ///
- /// Implementations of this method should return None if schema with `name`
- /// does not exist.
- ///
- /// By default returns a "Not Implemented" error
- fn deregister_schema(
- &self,
- _name: &str,
- _cascade: bool,
- ) -> Result<Option<Arc<dyn SchemaProvider>>> {
- Err(DataFusionError::NotImplemented(
- "Deregistering new schemas is not supported".to_string(),
- ))
- }
-}
-
-/// Simple in-memory implementation of a catalog.
-pub struct MemoryCatalogProvider {
- schemas: DashMap<String, Arc<dyn SchemaProvider>>,
-}
-
-impl MemoryCatalogProvider {
- /// Instantiates a new MemoryCatalogProvider with an empty collection of schemas.
- pub fn new() -> Self {
- Self {
- schemas: DashMap::new(),
- }
- }
-}
-
-impl CatalogProvider for MemoryCatalogProvider {
- fn as_any(&self) -> &dyn Any {
- self
- }
-
- fn schema_names(&self) -> Vec<String> {
- self.schemas.iter().map(|s| s.key().clone()).collect()
- }
-
- fn schema(&self, name: &str) -> Option<Arc<dyn SchemaProvider>> {
- self.schemas.get(name).map(|s| s.value().clone())
- }
-
- fn register_schema(
- &self,
- name: &str,
- schema: Arc<dyn SchemaProvider>,
- ) -> Result<Option<Arc<dyn SchemaProvider>>> {
- Ok(self.schemas.insert(name.into(), schema))
- }
-
- fn deregister_schema(
- &self,
- name: &str,
- cascade: bool,
- ) -> Result<Option<Arc<dyn SchemaProvider>>> {
- if let Some(schema) = self.schema(name) {
- let table_names = schema.table_names();
- match (table_names.is_empty(), cascade) {
- (true, _) | (false, true) => {
- let (_, removed) = self.schemas.remove(name).unwrap();
- Ok(Some(removed))
- }
- (false, false) => Err(DataFusionError::Execution(format!(
- "Cannot drop schema {} because other tables depend on it: {}",
- name,
- itertools::join(table_names.iter(), ", ")
- ))),
- }
- } else {
- Ok(None)
- }
- }
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
- use crate::catalog::schema::MemorySchemaProvider;
- use crate::datasource::empty::EmptyTable;
- use crate::datasource::TableProvider;
- use arrow::datatypes::Schema;
-
- #[test]
- fn default_register_schema_not_supported() {
- // mimic a new CatalogProvider and ensure it does not support registering schemas
- struct TestProvider {}
- impl CatalogProvider for TestProvider {
- fn as_any(&self) -> &dyn Any {
- self
- }
-
- fn schema_names(&self) -> Vec<String> {
- unimplemented!()
- }
-
- fn schema(&self, _name: &str) -> Option<Arc<dyn SchemaProvider>> {
- unimplemented!()
- }
- }
-
- let schema = Arc::new(MemorySchemaProvider::new()) as _;
- let catalog = Arc::new(TestProvider {});
-
- match catalog.register_schema("foo", schema) {
- Ok(_) => panic!("unexpected OK"),
- Err(e) => assert_eq!(e.to_string(), "This feature is not implemented: Registering new schemas is not supported"),
- };
- }
-
- #[test]
- fn memory_catalog_dereg_nonempty_schema() {
- let cat = Arc::new(MemoryCatalogProvider::new()) as Arc<dyn CatalogProvider>;
-
- let schema = Arc::new(MemorySchemaProvider::new()) as Arc<dyn SchemaProvider>;
- let test_table = Arc::new(EmptyTable::new(Arc::new(Schema::empty())))
- as Arc<dyn TableProvider>;
- schema.register_table("t".into(), test_table).unwrap();
-
- cat.register_schema("foo", schema.clone()).unwrap();
-
- assert!(
- cat.deregister_schema("foo", false).is_err(),
- "dropping empty schema without cascade should error"
- );
- assert!(cat.deregister_schema("foo", true).unwrap().is_some());
- }
-
- #[test]
- fn memory_catalog_dereg_empty_schema() {
- let cat = Arc::new(MemoryCatalogProvider::new()) as Arc<dyn CatalogProvider>;
-
- let schema = Arc::new(MemorySchemaProvider::new()) as Arc<dyn SchemaProvider>;
- cat.register_schema("foo", schema.clone()).unwrap();
-
- assert!(cat.deregister_schema("foo", false).unwrap().is_some());
- }
-
- #[test]
- fn memory_catalog_dereg_missing() {
- let cat = Arc::new(MemoryCatalogProvider::new()) as Arc<dyn CatalogProvider>;
- assert!(cat.deregister_schema("foo", false).unwrap().is_none());
- }
-}
diff --git a/datafusion/core/src/catalog/information_schema.rs b/datafusion/core/src/catalog/information_schema.rs
index d30b490f28..033bb3266e 100644
--- a/datafusion/core/src/catalog/information_schema.rs
+++ b/datafusion/core/src/catalog/information_schema.rs
@@ -36,7 +36,7 @@ use crate::logical_expr::TableType;
use crate::physical_plan::stream::RecordBatchStreamAdapter;
use crate::physical_plan::SendableRecordBatchStream;
-use super::{catalog::CatalogList, schema::SchemaProvider};
+use super::{schema::SchemaProvider, CatalogList};
pub(crate) const INFORMATION_SCHEMA: &str = "information_schema";
pub(crate) const TABLES: &str = "tables";
diff --git a/datafusion/core/src/catalog/listing_schema.rs b/datafusion/core/src/catalog/listing_schema.rs
index cb63659997..e7b4d8dec0 100644
--- a/datafusion/core/src/catalog/listing_schema.rs
+++ b/datafusion/core/src/catalog/listing_schema.rs
@@ -17,7 +17,7 @@
//! listing_schema contains a SchemaProvider that scans ObjectStores for tables automatically
use crate::catalog::schema::SchemaProvider;
-use crate::datasource::datasource::TableProviderFactory;
+use crate::datasource::provider::TableProviderFactory;
use crate::datasource::TableProvider;
use crate::execution::context::SessionState;
use async_trait::async_trait;
diff --git a/datafusion/core/src/catalog/mod.rs b/datafusion/core/src/catalog/mod.rs
index 7696856b32..591f882bdb 100644
--- a/datafusion/core/src/catalog/mod.rs
+++ b/datafusion/core/src/catalog/mod.rs
@@ -17,14 +17,265 @@
//! Interfaces and default implementations of catalogs and schemas.
-// TODO(clippy): Having a `catalog::catalog` module path is unclear and ambiguous.
-// The parent module should probably be renamed to something that more accurately
-// describes its content. Something along the lines of `database_meta`, `metadata`
-// or `meta`, perhaps?
-#![allow(clippy::module_inception)]
-pub mod catalog;
pub mod information_schema;
pub mod listing_schema;
pub mod schema;
pub use datafusion_sql::{ResolvedTableReference, TableReference};
+
+use crate::catalog::schema::SchemaProvider;
+use dashmap::DashMap;
+use datafusion_common::{DataFusionError, Result};
+use std::any::Any;
+use std::sync::Arc;
+
+/// Represent a list of named catalogs
+pub trait CatalogList: Sync + Send {
+ /// Returns the catalog list as [`Any`](std::any::Any)
+ /// so that it can be downcast to a specific implementation.
+ fn as_any(&self) -> &dyn Any;
+
+ /// Adds a new catalog to this catalog list
+ /// If a catalog of the same name existed before, it is replaced in the list and returned.
+ fn register_catalog(
+ &self,
+ name: String,
+ catalog: Arc<dyn CatalogProvider>,
+ ) -> Option<Arc<dyn CatalogProvider>>;
+
+ /// Retrieves the list of available catalog names
+ fn catalog_names(&self) -> Vec<String>;
+
+ /// Retrieves a specific catalog by name, provided it exists.
+ fn catalog(&self, name: &str) -> Option<Arc<dyn CatalogProvider>>;
+}
+
+/// Simple in-memory list of catalogs
+pub struct MemoryCatalogList {
+ /// Collection of catalogs containing schemas and ultimately TableProviders
+ pub catalogs: DashMap<String, Arc<dyn CatalogProvider>>,
+}
+
+impl MemoryCatalogList {
+ /// Instantiates a new `MemoryCatalogList` with an empty collection of catalogs
+ pub fn new() -> Self {
+ Self {
+ catalogs: DashMap::new(),
+ }
+ }
+}
+
+impl Default for MemoryCatalogList {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl CatalogList for MemoryCatalogList {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+
+ fn register_catalog(
+ &self,
+ name: String,
+ catalog: Arc<dyn CatalogProvider>,
+ ) -> Option<Arc<dyn CatalogProvider>> {
+ self.catalogs.insert(name, catalog)
+ }
+
+ fn catalog_names(&self) -> Vec<String> {
+ self.catalogs.iter().map(|c| c.key().clone()).collect()
+ }
+
+ fn catalog(&self, name: &str) -> Option<Arc<dyn CatalogProvider>> {
+ self.catalogs.get(name).map(|c| c.value().clone())
+ }
+}
+
+impl Default for MemoryCatalogProvider {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+/// Represents a catalog, comprising a number of named schemas.
+pub trait CatalogProvider: Sync + Send {
+ /// Returns the catalog provider as [`Any`](std::any::Any)
+ /// so that it can be downcast to a specific implementation.
+ fn as_any(&self) -> &dyn Any;
+
+ /// Retrieves the list of available schema names in this catalog.
+ fn schema_names(&self) -> Vec<String>;
+
+ /// Retrieves a specific schema from the catalog by name, provided it exists.
+ fn schema(&self, name: &str) -> Option<Arc<dyn SchemaProvider>>;
+
+ /// Adds a new schema to this catalog.
+ ///
+ /// If a schema of the same name existed before, it is replaced in
+ /// the catalog and returned.
+ ///
+ /// By default returns a "Not Implemented" error
+ fn register_schema(
+ &self,
+ name: &str,
+ schema: Arc<dyn SchemaProvider>,
+ ) -> Result<Option<Arc<dyn SchemaProvider>>> {
+ // use variables to avoid unused variable warnings
+ let _ = name;
+ let _ = schema;
+ Err(DataFusionError::NotImplemented(
+ "Registering new schemas is not supported".to_string(),
+ ))
+ }
+
+ /// Removes a schema from this catalog. Implementations of this method should return
+ /// errors if the schema exists but cannot be dropped. For example, in DataFusion's
+ /// default in-memory catalog, [`MemoryCatalogProvider`], a non-empty schema
+ /// will only be successfully dropped when `cascade` is true.
+ /// This is equivalent to how DROP SCHEMA works in PostgreSQL.
+ ///
+ /// Implementations of this method should return None if schema with `name`
+ /// does not exist.
+ ///
+ /// By default returns a "Not Implemented" error
+ fn deregister_schema(
+ &self,
+ _name: &str,
+ _cascade: bool,
+ ) -> Result<Option<Arc<dyn SchemaProvider>>> {
+ Err(DataFusionError::NotImplemented(
+ "Deregistering new schemas is not supported".to_string(),
+ ))
+ }
+}
+
+/// Simple in-memory implementation of a catalog.
+pub struct MemoryCatalogProvider {
+ schemas: DashMap<String, Arc<dyn SchemaProvider>>,
+}
+
+impl MemoryCatalogProvider {
+ /// Instantiates a new MemoryCatalogProvider with an empty collection of schemas.
+ pub fn new() -> Self {
+ Self {
+ schemas: DashMap::new(),
+ }
+ }
+}
+
+impl CatalogProvider for MemoryCatalogProvider {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+
+ fn schema_names(&self) -> Vec<String> {
+ self.schemas.iter().map(|s| s.key().clone()).collect()
+ }
+
+ fn schema(&self, name: &str) -> Option<Arc<dyn SchemaProvider>> {
+ self.schemas.get(name).map(|s| s.value().clone())
+ }
+
+ fn register_schema(
+ &self,
+ name: &str,
+ schema: Arc<dyn SchemaProvider>,
+ ) -> Result<Option<Arc<dyn SchemaProvider>>> {
+ Ok(self.schemas.insert(name.into(), schema))
+ }
+
+ fn deregister_schema(
+ &self,
+ name: &str,
+ cascade: bool,
+ ) -> Result<Option<Arc<dyn SchemaProvider>>> {
+ if let Some(schema) = self.schema(name) {
+ let table_names = schema.table_names();
+ match (table_names.is_empty(), cascade) {
+ (true, _) | (false, true) => {
+ let (_, removed) = self.schemas.remove(name).unwrap();
+ Ok(Some(removed))
+ }
+ (false, false) => Err(DataFusionError::Execution(format!(
+ "Cannot drop schema {} because other tables depend on it: {}",
+ name,
+ itertools::join(table_names.iter(), ", ")
+ ))),
+ }
+ } else {
+ Ok(None)
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::catalog::schema::MemorySchemaProvider;
+ use crate::datasource::empty::EmptyTable;
+ use crate::datasource::TableProvider;
+ use arrow::datatypes::Schema;
+
+ #[test]
+ fn default_register_schema_not_supported() {
+ // mimic a new CatalogProvider and ensure it does not support registering schemas
+ struct TestProvider {}
+ impl CatalogProvider for TestProvider {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+
+ fn schema_names(&self) -> Vec<String> {
+ unimplemented!()
+ }
+
+ fn schema(&self, _name: &str) -> Option<Arc<dyn SchemaProvider>> {
+ unimplemented!()
+ }
+ }
+
+ let schema = Arc::new(MemorySchemaProvider::new()) as _;
+ let catalog = Arc::new(TestProvider {});
+
+ match catalog.register_schema("foo", schema) {
+ Ok(_) => panic!("unexpected OK"),
+ Err(e) => assert_eq!(e.to_string(), "This feature is not implemented: Registering new schemas is not supported"),
+ };
+ }
+
+ #[test]
+ fn memory_catalog_dereg_nonempty_schema() {
+ let cat = Arc::new(MemoryCatalogProvider::new()) as Arc<dyn CatalogProvider>;
+
+ let schema = Arc::new(MemorySchemaProvider::new()) as Arc<dyn SchemaProvider>;
+ let test_table = Arc::new(EmptyTable::new(Arc::new(Schema::empty())))
+ as Arc<dyn TableProvider>;
+ schema.register_table("t".into(), test_table).unwrap();
+
+ cat.register_schema("foo", schema.clone()).unwrap();
+
+ assert!(
+ cat.deregister_schema("foo", false).is_err(),
+ "dropping empty schema without cascade should error"
+ );
+ assert!(cat.deregister_schema("foo", true).unwrap().is_some());
+ }
+
+ #[test]
+ fn memory_catalog_dereg_empty_schema() {
+ let cat = Arc::new(MemoryCatalogProvider::new()) as Arc<dyn CatalogProvider>;
+
+ let schema = Arc::new(MemorySchemaProvider::new()) as Arc<dyn SchemaProvider>;
+ cat.register_schema("foo", schema.clone()).unwrap();
+
+ assert!(cat.deregister_schema("foo", false).unwrap().is_some());
+ }
+
+ #[test]
+ fn memory_catalog_dereg_missing() {
+ let cat = Arc::new(MemoryCatalogProvider::new()) as Arc<dyn CatalogProvider>;
+ assert!(cat.deregister_schema("foo", false).unwrap().is_none());
+ }
+}
diff --git a/datafusion/core/src/catalog/schema.rs b/datafusion/core/src/catalog/schema.rs
index 9d3b47546e..552dccc41a 100644
--- a/datafusion/core/src/catalog/schema.rs
+++ b/datafusion/core/src/catalog/schema.rs
@@ -133,8 +133,8 @@ mod tests {
use arrow::datatypes::Schema;
use crate::assert_batches_eq;
- use crate::catalog::catalog::{CatalogProvider, MemoryCatalogProvider};
use crate::catalog::schema::{MemorySchemaProvider, SchemaProvider};
+ use crate::catalog::{CatalogProvider, MemoryCatalogProvider};
use crate::datasource::empty::EmptyTable;
use crate::datasource::listing::{ListingTable, ListingTableConfig, ListingTableUrl};
use crate::prelude::SessionContext;
diff --git a/datafusion/core/src/datasource/file_format/mod.rs b/datafusion/core/src/datasource/file_format/mod.rs
index a6848b0d12..4cc6e8706a 100644
--- a/datafusion/core/src/datasource/file_format/mod.rs
+++ b/datafusion/core/src/datasource/file_format/mod.rs
@@ -57,7 +57,7 @@ use tokio::io::AsyncWrite;
/// from the [`TableProvider`]. This helps code re-utilization across
/// providers that support the the same file formats.
///
-/// [`TableProvider`]: crate::datasource::datasource::TableProvider
+/// [`TableProvider`]: crate::datasource::provider::TableProvider
#[async_trait]
pub trait FileFormat: Send + Sync + fmt::Debug {
/// Returns the table provider as [`Any`](std::any::Any) so that it can be
diff --git a/datafusion/core/src/datasource/listing_table_factory.rs b/datafusion/core/src/datasource/listing_table_factory.rs
index 4bc6c12415..fdd696172b 100644
--- a/datafusion/core/src/datasource/listing_table_factory.rs
+++ b/datafusion/core/src/datasource/listing_table_factory.rs
@@ -26,7 +26,6 @@ use async_trait::async_trait;
use datafusion_common::DataFusionError;
use datafusion_expr::CreateExternalTable;
-use crate::datasource::datasource::TableProviderFactory;
use crate::datasource::file_format::arrow::ArrowFormat;
use crate::datasource::file_format::avro::AvroFormat;
use crate::datasource::file_format::csv::CsvFormat;
@@ -37,6 +36,7 @@ use crate::datasource::file_format::FileFormat;
use crate::datasource::listing::{
ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl,
};
+use crate::datasource::provider::TableProviderFactory;
use crate::datasource::TableProvider;
use crate::execution::context::SessionState;
diff --git a/datafusion/core/src/datasource/mod.rs b/datafusion/core/src/datasource/mod.rs
index 0f9a8c3d73..169a17d090 100644
--- a/datafusion/core/src/datasource/mod.rs
+++ b/datafusion/core/src/datasource/mod.rs
@@ -19,12 +19,7 @@
//!
//! [`ListingTable`]: crate::datasource::listing::ListingTable
-// TODO(clippy): Having a `datasource::datasource` module path is unclear and ambiguous.
-// The child module should probably be renamed to something that more accurately
-// describes its content. Something along the lines of `provider`, or `providers`.
-#![allow(clippy::module_inception)]
pub mod avro_to_arrow;
-pub mod datasource;
pub mod default_table_source;
pub mod empty;
pub mod file_format;
@@ -32,6 +27,7 @@ pub mod listing;
pub mod listing_table_factory;
pub mod memory;
pub mod physical_plan;
+pub mod provider;
pub mod streaming;
pub mod view;
@@ -40,12 +36,12 @@ pub use datafusion_execution::object_store;
use futures::Stream;
-pub use self::datasource::TableProvider;
pub use self::default_table_source::{
provider_as_source, source_as_provider, DefaultTableSource,
};
use self::listing::PartitionedFile;
pub use self::memory::MemTable;
+pub use self::provider::TableProvider;
pub use self::view::ViewTable;
use crate::arrow::datatypes::{Schema, SchemaRef};
use crate::error::Result;
diff --git a/datafusion/core/src/datasource/datasource.rs b/datafusion/core/src/datasource/provider.rs
similarity index 100%
rename from datafusion/core/src/datasource/datasource.rs
rename to datafusion/core/src/datasource/provider.rs
diff --git a/datafusion/core/src/execution/context.rs b/datafusion/core/src/execution/context.rs
index a4f7ba31fb..5dc8a97544 100644
--- a/datafusion/core/src/execution/context.rs
+++ b/datafusion/core/src/execution/context.rs
@@ -17,11 +17,11 @@
//! [`SessionContext`] contains methods for registering data sources and executing queries
use crate::{
- catalog::catalog::{CatalogList, MemoryCatalogList},
+ catalog::{CatalogList, MemoryCatalogList},
datasource::{
- datasource::TableProviderFactory,
listing::{ListingOptions, ListingTable},
listing_table_factory::ListingTableFactory,
+ provider::TableProviderFactory,
},
datasource::{MemTable, ViewTable},
logical_expr::{PlanType, ToStringifiedPlan},
@@ -52,8 +52,8 @@ use arrow::{
};
use crate::catalog::{
- catalog::{CatalogProvider, MemoryCatalogProvider},
schema::{MemorySchemaProvider, SchemaProvider},
+ {CatalogProvider, MemoryCatalogProvider},
};
use crate::dataframe::DataFrame;
use crate::datasource::{
diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index ca44af7339..71bd866de5 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -145,7 +145,7 @@
//! You can find examples of each of them in the [datafusion-examples] directory.
//!
//! [`TableProvider`]: crate::datasource::TableProvider
-//! [`CatalogProvider`]: crate::catalog::catalog::CatalogProvider
+//! [`CatalogProvider`]: crate::catalog::CatalogProvider
//! [`LogicalPlanBuilder`]: datafusion_expr::logical_plan::builder::LogicalPlanBuilder
//! [`ScalarUDF`]: physical_plan::udf::ScalarUDF
//! [`AggregateUDF`]: physical_plan::udaf::AggregateUDF
diff --git a/datafusion/core/src/test_util/mod.rs b/datafusion/core/src/test_util/mod.rs
index 993ca9c186..01d502f29f 100644
--- a/datafusion/core/src/test_util/mod.rs
+++ b/datafusion/core/src/test_util/mod.rs
@@ -26,7 +26,7 @@ use std::pin::Pin;
use std::task::{Context, Poll};
use std::{env, error::Error, path::PathBuf, sync::Arc};
-use crate::datasource::datasource::TableProviderFactory;
+use crate::datasource::provider::TableProviderFactory;
use crate::datasource::{empty::EmptyTable, provider_as_source, TableProvider};
use crate::error::Result;
use crate::execution::context::{SessionState, TaskContext};
diff --git a/datafusion/core/tests/provider_filter_pushdown.rs b/datafusion/core/tests/provider_filter_pushdown.rs
index ac1eef850d..a809832387 100644
--- a/datafusion/core/tests/provider_filter_pushdown.rs
+++ b/datafusion/core/tests/provider_filter_pushdown.rs
@@ -19,7 +19,7 @@ use arrow::array::{Int32Builder, Int64Array};
use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
use arrow::record_batch::RecordBatch;
use async_trait::async_trait;
-use datafusion::datasource::datasource::{TableProvider, TableType};
+use datafusion::datasource::provider::{TableProvider, TableType};
use datafusion::error::Result;
use datafusion::execution::context::{SessionContext, SessionState, TaskContext};
use datafusion::logical_expr::{Expr, TableProviderFilterPushDown};
diff --git a/datafusion/core/tests/sql/information_schema.rs b/datafusion/core/tests/sql/information_schema.rs
index 68ac6c5d62..1cb5180991 100644
--- a/datafusion/core/tests/sql/information_schema.rs
+++ b/datafusion/core/tests/sql/information_schema.rs
@@ -19,8 +19,8 @@ use async_trait::async_trait;
use datafusion::execution::context::SessionState;
use datafusion::{
catalog::{
- catalog::{CatalogProvider, MemoryCatalogProvider},
schema::{MemorySchemaProvider, SchemaProvider},
+ CatalogProvider, MemoryCatalogProvider,
},
datasource::{TableProvider, TableType},
};
diff --git a/datafusion/core/tests/sql/order.rs b/datafusion/core/tests/sql/order.rs
index 100e5b8c44..e1a8221ecc 100644
--- a/datafusion/core/tests/sql/order.rs
+++ b/datafusion/core/tests/sql/order.rs
@@ -16,9 +16,9 @@
// under the License.
use super::*;
-use datafusion::datasource::datasource::TableProviderFactory;
use datafusion::datasource::listing::ListingTable;
use datafusion::datasource::listing_table_factory::ListingTableFactory;
+use datafusion::datasource::provider::TableProviderFactory;
use datafusion_expr::logical_plan::DdlStatement;
use test_utils::{batches_to_vec, partitions_to_sorted_vec};
diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs
index 646d02384d..51f60dcd5f 100644
--- a/datafusion/proto/src/logical_plan/mod.rs
+++ b/datafusion/proto/src/logical_plan/mod.rs
@@ -1439,7 +1439,7 @@ mod roundtrip_tests {
TimeUnit, UnionMode,
},
};
- use datafusion::datasource::datasource::TableProviderFactory;
+ use datafusion::datasource::provider::TableProviderFactory;
use datafusion::datasource::TableProvider;
use datafusion::execution::context::SessionState;
use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv};