You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2022/10/28 09:49:13 UTC

[arrow-datafusion] branch master updated: Add TableProvider.statistics method (#3986)

This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 4ea970d1b Add TableProvider.statistics method (#3986)
4ea970d1b is described below

commit 4ea970d1bda5bdcede31d41f5c4ba7f9a280c659
Author: Andy Grove <an...@gmail.com>
AuthorDate: Fri Oct 28 03:49:06 2022 -0600

    Add TableProvider.statistics method (#3986)
---
 datafusion/common/src/stats.rs               | 6 +++---
 datafusion/core/src/datasource/datasource.rs | 6 ++++++
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/datafusion/common/src/stats.rs b/datafusion/common/src/stats.rs
index c27f77532..d0f150a31 100644
--- a/datafusion/common/src/stats.rs
+++ b/datafusion/common/src/stats.rs
@@ -15,11 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! This module provides an interface for plan level statistics.
+//! This module provides data structures to represent statistics
 
 use crate::ScalarValue;
 
-/// Statistics for a physical plan node
+/// Statistics for a relation
 /// Fields are optional and can be inexact because the sources
 /// sometimes provide approximate estimates for performance reasons
 /// and the transformations output are not always predictable.
@@ -37,7 +37,7 @@ pub struct Statistics {
     pub is_exact: bool,
 }
 
-/// This table statistics are estimates about column
+/// Statistics for a column within a relation
 #[derive(Clone, Debug, Default, PartialEq, Eq)]
 pub struct ColumnStatistics {
     /// Number of null values on column
diff --git a/datafusion/core/src/datasource/datasource.rs b/datafusion/core/src/datasource/datasource.rs
index 38e3193c4..5b71c077f 100644
--- a/datafusion/core/src/datasource/datasource.rs
+++ b/datafusion/core/src/datasource/datasource.rs
@@ -21,6 +21,7 @@ use std::any::Any;
 use std::sync::Arc;
 
 use async_trait::async_trait;
+use datafusion_common::Statistics;
 use datafusion_expr::LogicalPlan;
 pub use datafusion_expr::{TableProviderFilterPushDown, TableType};
 
@@ -77,6 +78,11 @@ pub trait TableProvider: Sync + Send {
     ) -> Result<TableProviderFilterPushDown> {
         Ok(TableProviderFilterPushDown::Unsupported)
     }
+
+    /// Get statistics for this table, if available
+    fn statistics(&self) -> Option<Statistics> {
+        None
+    }
 }
 
 /// A factory which creates [`TableProvider`]s at runtime given a URL.