You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/06/09 17:18:06 UTC

[arrow-rs] branch master updated: Improve parquet WriterProperites and ReaderProperties docs (#4392)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 7bedb0a3a Improve parquet WriterProperites and ReaderProperties docs (#4392)
7bedb0a3a is described below

commit 7bedb0a3ac642395daaa5318cb71f8f5c994657b
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Fri Jun 9 13:18:00 2023 -0400

    Improve parquet WriterProperites and ReaderProperties docs (#4392)
---
 parquet/src/file/properties.rs | 139 ++++++++++++++++++++++-------------------
 1 file changed, 74 insertions(+), 65 deletions(-)

diff --git a/parquet/src/file/properties.rs b/parquet/src/file/properties.rs
index 66690463a..9724fd7f4 100644
--- a/parquet/src/file/properties.rs
+++ b/parquet/src/file/properties.rs
@@ -15,55 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! [`WriterProperties`]
-//!
-//! # Usage
-//!
-//! ```rust
-//! use parquet::{
-//!     basic::{Compression, Encoding},
-//!     file::properties::*,
-//!     schema::types::ColumnPath,
-//! };
-//!
-//! // Create properties with default configuration.
-//! let props = WriterProperties::default();
-//!
-//! // Use properties builder to set certain options and assemble the configuration.
-//! let props = WriterProperties::builder()
-//!     .set_writer_version(WriterVersion::PARQUET_1_0)
-//!     .set_encoding(Encoding::PLAIN)
-//!     .set_column_encoding(ColumnPath::from("col1"), Encoding::DELTA_BINARY_PACKED)
-//!     .set_compression(Compression::SNAPPY)
-//!     .build();
-//!
-//! assert_eq!(props.writer_version(), WriterVersion::PARQUET_1_0);
-//! assert_eq!(
-//!     props.encoding(&ColumnPath::from("col1")),
-//!     Some(Encoding::DELTA_BINARY_PACKED)
-//! );
-//! assert_eq!(
-//!     props.encoding(&ColumnPath::from("col2")),
-//!     Some(Encoding::PLAIN)
-//! );
-//! ```
-//!
-//! Reader properties.
-//!
-//! # Usage
-//!
-//! ```rust
-//! use parquet::file::properties::ReaderProperties;
-//!
-//! // Create properties with default configuration.
-//! let props = ReaderProperties::builder().build();
-//!
-//! // Use properties builder to set certain options and assemble the configuration.
-//! let props = ReaderProperties::builder()
-//!     .set_backward_compatible_lz4(false)
-//!     .build();
-//! ```
-
+//! Configuration via [`WriterProperties`] and [`ReaderProperties`]
 use std::{collections::HashMap, sync::Arc};
 
 use crate::basic::{Compression, Encoding};
@@ -72,20 +24,30 @@ use crate::file::metadata::KeyValue;
 use crate::format::SortingColumn;
 use crate::schema::types::ColumnPath;
 
-const DEFAULT_PAGE_SIZE: usize = 1024 * 1024;
-const DEFAULT_WRITE_BATCH_SIZE: usize = 1024;
-const DEFAULT_WRITER_VERSION: WriterVersion = WriterVersion::PARQUET_1_0;
-const DEFAULT_COMPRESSION: Compression = Compression::UNCOMPRESSED;
-const DEFAULT_DICTIONARY_ENABLED: bool = true;
-const DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT: usize = DEFAULT_PAGE_SIZE;
-const DEFAULT_STATISTICS_ENABLED: EnabledStatistics = EnabledStatistics::Page;
-const DEFAULT_MAX_STATISTICS_SIZE: usize = 4096;
-const DEFAULT_MAX_ROW_GROUP_SIZE: usize = 1024 * 1024;
-const DEFAULT_CREATED_BY: &str =
+/// Default value for [`WriterProperties::data_page_size_limit`]
+pub const DEFAULT_PAGE_SIZE: usize = 1024 * 1024;
+/// Default value for [`WriterProperties::write_batch_size`]
+pub const DEFAULT_WRITE_BATCH_SIZE: usize = 1024;
+/// Default value for [`WriterProperties::writer_version`]
+pub const DEFAULT_WRITER_VERSION: WriterVersion = WriterVersion::PARQUET_1_0;
+/// Default value for [`WriterProperties::compression`]
+pub const DEFAULT_COMPRESSION: Compression = Compression::UNCOMPRESSED;
+/// Default value for [`WriterProperties::dictionary_enabled`]
+pub const DEFAULT_DICTIONARY_ENABLED: bool = true;
+/// Default value for [`WriterProperties::dictionary_page_size_limit`]
+pub const DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT: usize = DEFAULT_PAGE_SIZE;
+/// Default value for [`WriterProperties::statistics_enabled`]
+pub const DEFAULT_STATISTICS_ENABLED: EnabledStatistics = EnabledStatistics::Page;
+/// Default value for [`WriterProperties::max_statistics_size`]
+pub const DEFAULT_MAX_STATISTICS_SIZE: usize = 4096;
+/// Default value for [`WriterProperties::max_row_group_size`]
+pub const DEFAULT_MAX_ROW_GROUP_SIZE: usize = 1024 * 1024;
+/// Default value for [`WriterProperties::created_by`]
+pub const DEFAULT_CREATED_BY: &str =
     concat!("parquet-rs version ", env!("CARGO_PKG_VERSION"));
-/// default value for the false positive probability used in a bloom filter.
+/// Default value for [`BloomFilterProperties::fpp`]
 pub const DEFAULT_BLOOM_FILTER_FPP: f64 = 0.05;
-/// default value for the expected number of distinct values used in a bloom filter.
+/// Default value for [`BloomFilterProperties::ndv`]
 pub const DEFAULT_BLOOM_FILTER_NDV: u64 = 1_000_000_u64;
 
 /// Parquet writer version.
@@ -111,10 +73,41 @@ impl WriterVersion {
 /// Reference counted writer properties.
 pub type WriterPropertiesPtr = Arc<WriterProperties>;
 
-/// Writer properties.
+/// Configuration settings for writing parquet files.
 ///
 /// All properties except the key-value metadata are immutable,
 /// use [`WriterPropertiesBuilder`] to assemble these properties.
+///
+/// # Example
+///
+/// ```rust
+/// use parquet::{
+///     basic::{Compression, Encoding},
+///     file::properties::*,
+///     schema::types::ColumnPath,
+/// };
+///
+/// // Create properties with default configuration.
+/// let props = WriterProperties::default();
+///
+/// // Use properties builder to set certain options and assemble the configuration.
+/// let props = WriterProperties::builder()
+///     .set_writer_version(WriterVersion::PARQUET_1_0)
+///     .set_encoding(Encoding::PLAIN)
+///     .set_column_encoding(ColumnPath::from("col1"), Encoding::DELTA_BINARY_PACKED)
+///     .set_compression(Compression::SNAPPY)
+///     .build();
+///
+/// assert_eq!(props.writer_version(), WriterVersion::PARQUET_1_0);
+/// assert_eq!(
+///     props.encoding(&ColumnPath::from("col1")),
+///     Some(Encoding::DELTA_BINARY_PACKED)
+/// );
+/// assert_eq!(
+///     props.encoding(&ColumnPath::from("col2")),
+///     Some(Encoding::PLAIN)
+/// );
+/// ```
 #[derive(Debug, Clone)]
 pub struct WriterProperties {
     data_page_size_limit: usize,
@@ -307,7 +300,8 @@ impl WriterProperties {
     }
 }
 
-/// Writer properties builder.
+/// Builder for parquet file writer configuration. See example on
+/// [`WriterProperties`]
 pub struct WriterPropertiesBuilder {
     data_page_size_limit: usize,
     dictionary_page_size_limit: usize,
@@ -809,10 +803,24 @@ pub type ReaderPropertiesPtr = Arc<ReaderProperties>;
 
 const DEFAULT_READ_BLOOM_FILTER: bool = false;
 
-/// Reader properties.
+/// Configuration settings for reading parquet files.
 ///
 /// All properties are immutable and `Send` + `Sync`.
 /// Use [`ReaderPropertiesBuilder`] to assemble these properties.
+///
+/// # Example
+///
+/// ```rust
+/// use parquet::file::properties::ReaderProperties;
+///
+/// // Create properties with default configuration.
+/// let props = ReaderProperties::builder().build();
+///
+/// // Use properties builder to set certain options and assemble the configuration.
+/// let props = ReaderProperties::builder()
+///     .set_backward_compatible_lz4(false)
+///     .build();
+/// ```
 pub struct ReaderProperties {
     codec_options: CodecOptions,
     read_bloom_filter: bool,
@@ -835,7 +843,8 @@ impl ReaderProperties {
     }
 }
 
-/// Reader properties builder.
+/// Builder for parquet file reader configuration. See example on
+/// [`ReaderProperties`]
 pub struct ReaderPropertiesBuilder {
     codec_options_builder: CodecOptionsBuilder,
     read_bloom_filter: Option<bool>,