You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by nj...@apache.org on 2022/12/14 01:45:47 UTC

[arrow-datafusion] branch master updated: Normalize datafusion configuration names (#4596)

This is an automated email from the ASF dual-hosted git repository.

nju_yaho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new a5cf57789 Normalize datafusion configuration names (#4596)
a5cf57789 is described below

commit a5cf57789a73646b92ecefc1124cd38215b91ee7
Author: yahoNanJing <90...@users.noreply.github.com>
AuthorDate: Wed Dec 14 09:45:42 2022 +0800

    Normalize datafusion configuration names (#4596)
    
    Co-authored-by: yangzhong <ya...@ebay.com>
---
 datafusion/core/src/config.rs                      | 88 +++++++++++++---------
 .../test_files/information_schema.slt              |  2 +-
 docs/source/user-guide/configs.md                  |  2 +-
 3 files changed, 54 insertions(+), 38 deletions(-)

diff --git a/datafusion/core/src/config.rs b/datafusion/core/src/config.rs
index 034dd29fd..091721554 100644
--- a/datafusion/core/src/config.rs
+++ b/datafusion/core/src/config.rs
@@ -27,37 +27,25 @@ use std::env;
 use std::fmt::{Debug, Formatter};
 use std::sync::Arc;
 
-/// Configuration option "datafusion.execution.target_partitions"
-pub const OPT_TARGET_PARTITIONS: &str = "datafusion.execution.target_partitions";
-
+/*-************************************
+*  Catalog related
+**************************************/
 /// Configuration option "datafusion.catalog.create_default_catalog_and_schema"
 pub const OPT_CREATE_DEFAULT_CATALOG_AND_SCHEMA: &str =
     "datafusion.catalog.create_default_catalog_and_schema";
+
 /// Configuration option "datafusion.catalog.information_schema"
 pub const OPT_INFORMATION_SCHEMA: &str = "datafusion.catalog.information_schema";
 
-/// Configuration option "datafusion.optimizer.repartition_joins"
-pub const OPT_REPARTITION_JOINS: &str = "datafusion.optimizer.repartition_joins";
-
-/// Configuration option "datafusion.optimizer.repartition_aggregations"
-pub const OPT_REPARTITION_AGGREGATIONS: &str =
-    "datafusion.optimizer.repartition_aggregations";
-
-/// Configuration option "datafusion.optimizer.repartition_windows"
-pub const OPT_REPARTITION_WINDOWS: &str = "datafusion.optimizer.repartition_windows";
-
-/// Configuration option "datafusion.execuction_collect_statistics"
-pub const OPT_COLLECT_STATISTICS: &str = "datafusion.execuction_collect_statistics";
-
-/// Configuration option "datafusion.optimizer.filter_null_join_keys"
-pub const OPT_FILTER_NULL_JOIN_KEYS: &str = "datafusion.optimizer.filter_null_join_keys";
-
-/// Configuration option "datafusion.explain.logical_plan_only"
-pub const OPT_EXPLAIN_LOGICAL_PLAN_ONLY: &str = "datafusion.explain.logical_plan_only";
+/// Location scanned to load tables for `default` schema
+pub const OPT_CATALOG_LOCATION: &str = "datafusion.catalog.location";
 
-/// Configuration option "datafusion.explain.physical_plan_only"
-pub const OPT_EXPLAIN_PHYSICAL_PLAN_ONLY: &str = "datafusion.explain.physical_plan_only";
+/// Type of `TableProvider` to use when loading `default` schema
+pub const OPT_CATALOG_TYPE: &str = "datafusion.catalog.type";
 
+/*-************************************
+*  Execution related
+**************************************/
 /// Configuration option "datafusion.execution.batch_size"
 pub const OPT_BATCH_SIZE: &str = "datafusion.execution.batch_size";
 
@@ -68,9 +56,25 @@ pub const OPT_COALESCE_BATCHES: &str = "datafusion.execution.coalesce_batches";
 pub const OPT_COALESCE_TARGET_BATCH_SIZE: &str =
     "datafusion.execution.coalesce_target_batch_size";
 
+/// Configuration option "datafusion.execution.collect_statistics"
+pub const OPT_COLLECT_STATISTICS: &str = "datafusion.execution.collect_statistics";
+
+/// Configuration option "datafusion.execution.target_partitions"
+pub const OPT_TARGET_PARTITIONS: &str = "datafusion.execution.target_partitions";
+
 /// Configuration option "datafusion.execution.time_zone"
 pub const OPT_TIME_ZONE: &str = "datafusion.execution.time_zone";
 
+/*-************************************
+*  Execution parquet related
+**************************************/
+/// Configuration option "datafusion.execution.parquet.enable_page_index"
+pub const OPT_PARQUET_ENABLE_PAGE_INDEX: &str =
+    "datafusion.execution.parquet.enable_page_index";
+
+/// Configuration option "datafusion.execution.parquet.pruning"
+pub const OPT_PARQUET_ENABLE_PRUNING: &str = "datafusion.execution.parquet.pruning";
+
 /// Configuration option "datafusion.execution.parquet.pushdown_filters"
 pub const OPT_PARQUET_PUSHDOWN_FILTERS: &str =
     "datafusion.execution.parquet.pushdown_filters";
@@ -79,13 +83,6 @@ pub const OPT_PARQUET_PUSHDOWN_FILTERS: &str =
 pub const OPT_PARQUET_REORDER_FILTERS: &str =
     "datafusion.execution.parquet.reorder_filters";
 
-/// Configuration option "datafusion.execution.parquet.enable_page_index"
-pub const OPT_PARQUET_ENABLE_PAGE_INDEX: &str =
-    "datafusion.execution.parquet.enable_page_index";
-
-/// Configuration option "datafusion.execution.parquet.pruning"
-pub const OPT_PARQUET_ENABLE_PRUNING: &str = "datafusion.execution.parquet.pruning";
-
 /// Configuration option "datafusion.execution.parquet.skip_metadata"
 pub const OPT_PARQUET_SKIP_METADATA: &str = "datafusion.execution.parquet.skip_metadata";
 
@@ -93,6 +90,31 @@ pub const OPT_PARQUET_SKIP_METADATA: &str = "datafusion.execution.parquet.skip_m
 pub const OPT_PARQUET_METADATA_SIZE_HINT: &str =
     "datafusion.execution.parquet.metadata_size_hint";
 
+/*-************************************
+*  Explain related
+**************************************/
+/// Configuration option "datafusion.explain.logical_plan_only"
+pub const OPT_EXPLAIN_LOGICAL_PLAN_ONLY: &str = "datafusion.explain.logical_plan_only";
+
+/// Configuration option "datafusion.explain.physical_plan_only"
+pub const OPT_EXPLAIN_PHYSICAL_PLAN_ONLY: &str = "datafusion.explain.physical_plan_only";
+
+/*-************************************
+*  Optimizer related
+**************************************/
+/// Configuration option "datafusion.optimizer.filter_null_join_keys"
+pub const OPT_FILTER_NULL_JOIN_KEYS: &str = "datafusion.optimizer.filter_null_join_keys";
+
+/// Configuration option "datafusion.optimizer.repartition_aggregations"
+pub const OPT_REPARTITION_AGGREGATIONS: &str =
+    "datafusion.optimizer.repartition_aggregations";
+
+/// Configuration option "datafusion.optimizer.repartition_joins"
+pub const OPT_REPARTITION_JOINS: &str = "datafusion.optimizer.repartition_joins";
+
+/// Configuration option "datafusion.optimizer.repartition_windows"
+pub const OPT_REPARTITION_WINDOWS: &str = "datafusion.optimizer.repartition_windows";
+
 /// Configuration option "datafusion.optimizer.skip_failed_rules"
 pub const OPT_OPTIMIZER_SKIP_FAILED_RULES: &str =
     "datafusion.optimizer.skip_failed_rules";
@@ -100,12 +122,6 @@ pub const OPT_OPTIMIZER_SKIP_FAILED_RULES: &str =
 /// Configuration option "datafusion.optimizer.max_passes"
 pub const OPT_OPTIMIZER_MAX_PASSES: &str = "datafusion.optimizer.max_passes";
 
-/// Location scanned to load tables for `default` schema
-pub const OPT_CATALOG_LOCATION: &str = "datafusion.catalog.location";
-
-/// Type of `TableProvider` to use when loading `default` schema
-pub const OPT_CATALOG_TYPE: &str = "datafusion.catalog.type";
-
 /// Configuration option "datafusion.optimizer.top_down_join_key_reordering"
 pub const OPT_TOP_DOWN_JOIN_KEY_REORDERING: &str =
     "datafusion.optimizer.top_down_join_key_reordering";
diff --git a/datafusion/core/tests/sqllogictests/test_files/information_schema.slt b/datafusion/core/tests/sqllogictests/test_files/information_schema.slt
index 0599649da..e90fc2c12 100644
--- a/datafusion/core/tests/sqllogictests/test_files/information_schema.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/information_schema.slt
@@ -29,10 +29,10 @@ datafusion.catalog.create_default_catalog_and_schema true
 datafusion.catalog.information_schema true
 datafusion.catalog.location NULL
 datafusion.catalog.type NULL
-datafusion.execuction_collect_statistics false
 datafusion.execution.batch_size 8192
 datafusion.execution.coalesce_batches true
 datafusion.execution.coalesce_target_batch_size 4096
+datafusion.execution.collect_statistics false
 datafusion.execution.parquet.enable_page_index false
 datafusion.execution.parquet.metadata_size_hint NULL
 datafusion.execution.parquet.pruning true
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index e900d9c0b..81b1ef20a 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -41,10 +41,10 @@ Environment variables are read during `SessionConfig` initialisation so they mus
 | datafusion.catalog.information_schema                     | Boolean | false   | Should DataFusion provide access to `information_schema` virtual tables for displaying schema information                                                                                                                                                                                                                                                     |
 | datafusion.catalog.location                               | Utf8    | NULL    | Location scanned to load tables for `default` schema, defaults to None                                                                                                                                                                                                                                                                                        |
 | datafusion.catalog.type                                   | Utf8    | NULL    | Type of `TableProvider` to use when loading `default` schema. Defaults to None                                                                                                                                                                                                                                                                                |
-| datafusion.execuction_collect_statistics                  | Boolean | false   | Should DataFusion repartition data using the partitions keys to execute window functions in parallel using the provided `target_partitions` level                                                                                                                                                                                                             |
 | datafusion.execution.batch_size                           | UInt64  | 8192    | Default batch size while creating new batches, it's especially useful for buffer-in-memory batches since creating tiny batches would results in too much metadata memory consumption.                                                                                                                                                                         |
 | datafusion.execution.coalesce_batches                     | Boolean | true    | When set to true, record batches will be examined between each operator and small batches will be coalesced into larger batches. This is helpful when there are highly selective filters or joins that could produce tiny output batches. The target batch size is determined by the configuration setting 'datafusion.execution.coalesce_target_batch_size'. |
 | datafusion.execution.coalesce_target_batch_size           | UInt64  | 4096    | Target batch size when coalescing batches. Uses in conjunction with the configuration setting 'datafusion.execution.coalesce_batches'.                                                                                                                                                                                                                        |
+| datafusion.execution.collect_statistics                   | Boolean | false   | Should DataFusion repartition data using the partitions keys to execute window functions in parallel using the provided `target_partitions` level                                                                                                                                                                                                             |
 | datafusion.execution.parquet.enable_page_index            | Boolean | false   | If true, uses parquet data page level metadata (Page Index) statistics to reduce the number of rows decoded.                                                                                                                                                                                                                                                  |
 | datafusion.execution.parquet.metadata_size_hint           | UInt64  | NULL    | If specified, the parquet reader will try and fetch the last `size_hint` bytes of the parquet file optimistically. If not specified, two read are required: One read to fetch the 8-byte parquet footer and another to fetch the metadata length encoded in the footer.                                                                                       |
 | datafusion.execution.parquet.pruning                      | Boolean | true    | If true, the parquet reader attempts to skip entire row groups based on the predicate in the query and the metadata (min/max values) stored in the parquet file.                                                                                                                                                                                              |