You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ji...@apache.org on 2022/11/14 06:46:17 UTC

[arrow-rs] branch test-thrift-017 updated: fix doc err

This is an automated email from the ASF dual-hosted git repository.

jiayuliu pushed a commit to branch test-thrift-017
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/test-thrift-017 by this push:
     new 5ecc0d0c8 fix doc err
5ecc0d0c8 is described below

commit 5ecc0d0c87f283c46fb54c935244e6a57dce434d
Author: Jiayu Liu <ji...@hey.com>
AuthorDate: Mon Nov 14 14:27:18 2022 +0800

    fix doc err
---
 parquet/src/format.rs | 100 +++++++++++++++++++++++++-------------------------
 1 file changed, 50 insertions(+), 50 deletions(-)

diff --git a/parquet/src/format.rs b/parquet/src/format.rs
index 3d38dd531..2e57fa4f3 100644
--- a/parquet/src/format.rs
+++ b/parquet/src/format.rs
@@ -99,7 +99,7 @@ impl From<&Type> for i32 {
 
 /// DEPRECATED: Common types used by frameworks(e.g. hive, pig) using parquet.
 /// ConvertedType is superseded by LogicalType.  This enum should not be extended.
-/// 
+///
 /// See LogicalTypes.md for conversion between ConvertedType and LogicalType.
 #[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct ConvertedType(pub i32);
@@ -117,12 +117,12 @@ impl ConvertedType {
   /// an enum is converted into a binary field
   pub const ENUM: ConvertedType = ConvertedType(4);
   /// A decimal value.
-  /// 
+  ///
   /// This may be used to annotate binary or fixed primitive types. The
   /// underlying byte array stores the unscaled value encoded as two's
   /// complement using big-endian byte order (the most significant byte is the
   /// zeroth element). The value of the decimal is the value * 10^{-scale}.
-  /// 
+  ///
   /// This must be accompanied by a (maximum) precision and a scale in the
   /// SchemaElement. The precision specifies the number of digits in the decimal
   /// and the scale stores the location of the decimal point. For example 1.23
@@ -130,62 +130,62 @@ impl ConvertedType {
   /// 2 digits over).
   pub const DECIMAL: ConvertedType = ConvertedType(5);
   /// A Date
-  /// 
+  ///
   /// Stored as days since Unix epoch, encoded as the INT32 physical type.
-  /// 
+  ///
   pub const DATE: ConvertedType = ConvertedType(6);
   /// A time
-  /// 
+  ///
   /// The total number of milliseconds since midnight.  The value is stored
   /// as an INT32 physical type.
   pub const TIME_MILLIS: ConvertedType = ConvertedType(7);
   /// A time.
-  /// 
+  ///
   /// The total number of microseconds since midnight.  The value is stored as
   /// an INT64 physical type.
   pub const TIME_MICROS: ConvertedType = ConvertedType(8);
   /// A date/time combination
-  /// 
+  ///
   /// Date and time recorded as milliseconds since the Unix epoch.  Recorded as
   /// a physical type of INT64.
   pub const TIMESTAMP_MILLIS: ConvertedType = ConvertedType(9);
   /// A date/time combination
-  /// 
+  ///
   /// Date and time recorded as microseconds since the Unix epoch.  The value is
   /// stored as an INT64 physical type.
   pub const TIMESTAMP_MICROS: ConvertedType = ConvertedType(10);
   /// An unsigned integer value.
-  /// 
+  ///
   /// The number describes the maximum number of meaningful data bits in
   /// the stored value. 8, 16 and 32 bit values are stored using the
   /// INT32 physical type.  64 bit values are stored using the INT64
   /// physical type.
-  /// 
+  ///
   pub const UINT_8: ConvertedType = ConvertedType(11);
   pub const UINT_16: ConvertedType = ConvertedType(12);
   pub const UINT_32: ConvertedType = ConvertedType(13);
   pub const UINT_64: ConvertedType = ConvertedType(14);
   /// A signed integer value.
-  /// 
+  ///
   /// The number describes the maximum number of meaningful data bits in
   /// the stored value. 8, 16 and 32 bit values are stored using the
   /// INT32 physical type.  64 bit values are stored using the INT64
   /// physical type.
-  /// 
+  ///
   pub const INT_8: ConvertedType = ConvertedType(15);
   pub const INT_16: ConvertedType = ConvertedType(16);
   pub const INT_32: ConvertedType = ConvertedType(17);
   pub const INT_64: ConvertedType = ConvertedType(18);
   /// An embedded JSON document
-  /// 
+  ///
   /// A JSON document embedded within a single UTF8 column.
   pub const JSON: ConvertedType = ConvertedType(19);
   /// An embedded BSON document
-  /// 
+  ///
   /// A BSON document embedded within a single BINARY column.
   pub const BSON: ConvertedType = ConvertedType(20);
   /// An interval of time
-  /// 
+  ///
   /// This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12
   /// This data is composed of three separate little endian unsigned
   /// integers.  Each stores a component of a duration of time.  The first
@@ -443,11 +443,11 @@ impl From<&Encoding> for i32 {
 }
 
 /// Supported compression algorithms.
-/// 
+///
 /// Codecs added in format version X.Y can be read by readers based on X.Y and later.
 /// Codec support may vary between readers based on the format version and
 /// libraries available at runtime.
-/// 
+///
 /// See Compression.md for a detailed specification of these algorithms.
 #[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct CompressionCodec(pub i32);
@@ -640,14 +640,14 @@ impl From<&BoundaryOrder> for i32 {
 #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct Statistics {
   /// DEPRECATED: min and max value of the column. Use min_value and max_value.
-  /// 
+  ///
   /// Values are encoded using PLAIN encoding, except that variable-length byte
   /// arrays do not include a length prefix.
-  /// 
+  ///
   /// These fields encode min and max values determined by signed comparison
   /// only. New files should use the correct order for a column's logical type
   /// and store the values in the min_value and max_value fields.
-  /// 
+  ///
   /// To support older readers, these may be set when the column order is
   /// signed.
   pub max: Option<Vec<u8>>,
@@ -657,7 +657,7 @@ pub struct Statistics {
   /// count of distinct values occurring
   pub distinct_count: Option<i64>,
   /// Min and max values for the column, determined by its ColumnOrder.
-  /// 
+  ///
   /// Values are encoded using PLAIN encoding, except that variable-length byte
   /// arrays do not include a length prefix.
   pub max_value: Option<Vec<u8>>,
@@ -1079,7 +1079,7 @@ impl Default for DateType {
 //
 
 /// Logical type to annotate a column that is always null.
-/// 
+///
 /// Sometimes when discovering the schema of existing data, values are always
 /// null and the physical type can't be determined. This annotation signals
 /// the case where the physical type was guessed from all null values.
@@ -1132,10 +1132,10 @@ impl Default for NullType {
 //
 
 /// Decimal logical type annotation
-/// 
+///
 /// To maintain forward-compatibility in v1, implementations using this logical
 /// type must also set scale and precision on the annotated SchemaElement.
-/// 
+///
 /// Allowed for physical types: INT32, INT64, FIXED, and BINARY
 #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct DecimalType {
@@ -1450,7 +1450,7 @@ impl TSerializable for TimeUnit {
 //
 
 /// Timestamp logical type annotation
-/// 
+///
 /// Allowed for physical types: INT64
 #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct TimestampType {
@@ -1521,7 +1521,7 @@ impl TSerializable for TimestampType {
 //
 
 /// Time logical type annotation
-/// 
+///
 /// Allowed for physical types: INT32 (millis), INT64 (micros, nanos)
 #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct TimeType {
@@ -1592,9 +1592,9 @@ impl TSerializable for TimeType {
 //
 
 /// Integer logical type annotation
-/// 
+///
 /// bitWidth must be 8, 16, 32, or 64.
-/// 
+///
 /// Allowed for physical types: INT32, INT64
 #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct IntType {
@@ -1665,7 +1665,7 @@ impl TSerializable for IntType {
 //
 
 /// Embedded JSON logical type annotation
-/// 
+///
 /// Allowed for physical types: BINARY
 #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct JsonType {
@@ -1716,7 +1716,7 @@ impl Default for JsonType {
 //
 
 /// Embedded BSON logical type annotation
-/// 
+///
 /// Allowed for physical types: BINARY
 #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct BsonType {
@@ -2020,12 +2020,12 @@ pub struct SchemaElement {
   pub num_children: Option<i32>,
   /// DEPRECATED: When the schema is the result of a conversion from another model.
   /// Used to record the original type to help with cross conversion.
-  /// 
+  ///
   /// This is superseded by logicalType.
   pub converted_type: Option<ConvertedType>,
   /// DEPRECATED: Used when this column contains decimal data.
   /// See the DECIMAL converted type for more details.
-  /// 
+  ///
   /// This is superseded by using the DecimalType annotation in logicalType.
   pub scale: Option<i32>,
   pub precision: Option<i32>,
@@ -2033,7 +2033,7 @@ pub struct SchemaElement {
   /// original field id in the parquet schema
   pub field_id: Option<i32>,
   /// The logical type of this SchemaElement
-  /// 
+  ///
   /// LogicalType replaces ConvertedType, but ConvertedType is still required
   /// for some logical types to ensure forward-compatibility in format v1.
   pub logical_type: Option<LogicalType>,
@@ -2444,7 +2444,7 @@ impl TSerializable for DictionaryPageHeader {
 /// New page format allowing reading levels without decompressing the data
 /// Repetition and definition levels are uncompressed
 /// The remaining section containing the data is compressed if is_compressed is true
-/// 
+///
 #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct DataPageHeaderV2 {
   /// Number of values, including NULLs, in this data page. *
@@ -2724,7 +2724,7 @@ impl TSerializable for BloomFilterAlgorithm {
 
 /// Hash strategy type annotation. xxHash is an extremely fast non-cryptographic hash
 /// algorithm. It uses 64 bits version of xxHash.
-/// 
+///
 #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct XxHash {
 }
@@ -2847,7 +2847,7 @@ impl TSerializable for BloomFilterHash {
 //
 
 /// The compression used in the Bloom filter.
-/// 
+///
 #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct Uncompressed {
 }
@@ -2971,7 +2971,7 @@ impl TSerializable for BloomFilterCompression {
 
 /// Bloom filter header is stored at beginning of Bloom filter data of each column
 /// and followed by its bitset.
-/// 
+///
 #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct BloomFilterHeader {
   /// The size of bitset in bytes *
@@ -3101,7 +3101,7 @@ pub struct PageHeader {
   ///   encryption itself is performed after page compression (if compressed)
   /// If enabled, this allows for disabling checksumming in HDFS if only a few
   /// pages need to be read.
-  /// 
+  ///
   pub crc: Option<i32>,
   pub data_page_header: Option<DataPageHeader>,
   pub index_page_header: Option<IndexPageHeader>,
@@ -3977,14 +3977,14 @@ impl TSerializable for ColumnCryptoMetaData {
 pub struct ColumnChunk {
   /// File where column data is stored.  If not set, assumed to be same file as
   /// metadata.  This path is relative to the current file.
-  /// 
+  ///
   pub file_path: Option<String>,
   /// Byte offset in file_path to the ColumnMetaData *
   pub file_offset: i64,
   /// Column metadata for this chunk. This is the same content as what is at
   /// file_path/file_offset.  Having it here has it replicated in the file
   /// metadata.
-  /// 
+  ///
   pub meta_data: Option<ColumnMetaData>,
   /// File offset of ColumnChunk's OffsetIndex *
   pub offset_index_offset: Option<i64>,
@@ -4151,7 +4151,7 @@ impl TSerializable for ColumnChunk {
 pub struct RowGroup {
   /// Metadata for each column chunk in this row group.
   /// This list must have the same order as the SchemaElement list in FileMetaData.
-  /// 
+  ///
   pub columns: Vec<ColumnChunk>,
   /// Total byte size of all the uncompressed column data in this row group *
   pub total_byte_size: i64,
@@ -4521,7 +4521,7 @@ impl TSerializable for PageLocation {
 #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct OffsetIndex {
   /// PageLocations, ordered by increasing PageLocation.offset. It is required
-  /// that page_locations[i].first_row_index < page_locations[i+1].first_row_index.
+  /// that page_locations\[i\].first_row_index < page_locations\[i+1\].first_row_index.
   pub page_locations: Vec<PageLocation>,
 }
 
@@ -4587,27 +4587,27 @@ impl TSerializable for OffsetIndex {
 //
 
 /// Description for ColumnIndex.
-/// Each <array-field>[i] refers to the page at OffsetIndex.page_locations[i]
+/// Each <array-field>\[i\] refers to the page at OffsetIndex.page_locations\[i\]
 #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct ColumnIndex {
   /// A list of Boolean values to determine the validity of the corresponding
   /// min and max values. If true, a page contains only null values, and writers
   /// have to set the corresponding entries in min_values and max_values to
-  /// byte[0], so that all lists have the same length. If false, the
+  /// byte\[0\], so that all lists have the same length. If false, the
   /// corresponding entries in min_values and max_values must be valid.
   pub null_pages: Vec<bool>,
   /// Two lists containing lower and upper bounds for the values of each page.
   /// These may be the actual minimum and maximum values found on a page, but
   /// can also be (more compact) values that do not exist on a page. For
   /// example, instead of storing ""Blart Versenwald III", a writer may set
-  /// min_values[i]="B", max_values[i]="C". Such more compact values must still
+  /// min_values\[i\]="B", max_values\[i\]="C". Such more compact values must still
   /// be valid values within the column's logical type. Readers must make sure
   /// that list entries are populated before using them by inspecting null_pages.
   pub min_values: Vec<Vec<u8>>,
   pub max_values: Vec<Vec<u8>>,
   /// Stores whether both min_values and max_values are orderd and if so, in
   /// which direction. This allows readers to perform binary searches in both
-  /// lists. Readers cannot assume that max_values[i] <= min_values[i+1], even
+  /// lists. Readers cannot assume that max_values\[i\] <= min_values\[i+1\], even
   /// if the lists are ordered.
   pub boundary_order: BoundaryOrder,
   /// A list containing the number of null values for each page *
@@ -5051,17 +5051,17 @@ pub struct FileMetaData {
   /// String for application that wrote this file.  This should be in the format
   /// <Application> version <App Version> (build <App Build Hash>).
   /// e.g. impala version 1.0 (build 6cf94d29b2b7115df4de2c06e2ab4326d721eb55)
-  /// 
+  ///
   pub created_by: Option<String>,
   /// Sort order used for the min_value and max_value fields of each column in
   /// this file. Sort orders are listed in the order matching the columns in the
   /// schema. The indexes are not necessary the same though, because only leaf
   /// nodes of the schema are represented in the list of sort orders.
-  /// 
+  ///
   /// Without column_orders, the meaning of the min_value and max_value fields is
   /// undefined. To ensure well-defined behaviour, if min_value and max_value are
   /// written to a Parquet file, column_orders must be written as well.
-  /// 
+  ///
   /// The obsolete min and max fields are always sorted by signed comparison
   /// regardless of column_orders.
   pub column_orders: Option<Vec<ColumnOrder>>,