You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ji...@apache.org on 2022/11/14 06:46:17 UTC
[arrow-rs] branch test-thrift-017 updated: fix doc err
This is an automated email from the ASF dual-hosted git repository.
jiayuliu pushed a commit to branch test-thrift-017
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/test-thrift-017 by this push:
new 5ecc0d0c8 fix doc err
5ecc0d0c8 is described below
commit 5ecc0d0c87f283c46fb54c935244e6a57dce434d
Author: Jiayu Liu <ji...@hey.com>
AuthorDate: Mon Nov 14 14:27:18 2022 +0800
fix doc err
---
parquet/src/format.rs | 100 +++++++++++++++++++++++++-------------------------
1 file changed, 50 insertions(+), 50 deletions(-)
diff --git a/parquet/src/format.rs b/parquet/src/format.rs
index 3d38dd531..2e57fa4f3 100644
--- a/parquet/src/format.rs
+++ b/parquet/src/format.rs
@@ -99,7 +99,7 @@ impl From<&Type> for i32 {
/// DEPRECATED: Common types used by frameworks(e.g. hive, pig) using parquet.
/// ConvertedType is superseded by LogicalType. This enum should not be extended.
-///
+///
/// See LogicalTypes.md for conversion between ConvertedType and LogicalType.
#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct ConvertedType(pub i32);
@@ -117,12 +117,12 @@ impl ConvertedType {
/// an enum is converted into a binary field
pub const ENUM: ConvertedType = ConvertedType(4);
/// A decimal value.
- ///
+ ///
/// This may be used to annotate binary or fixed primitive types. The
/// underlying byte array stores the unscaled value encoded as two's
/// complement using big-endian byte order (the most significant byte is the
/// zeroth element). The value of the decimal is the value * 10^{-scale}.
- ///
+ ///
/// This must be accompanied by a (maximum) precision and a scale in the
/// SchemaElement. The precision specifies the number of digits in the decimal
/// and the scale stores the location of the decimal point. For example 1.23
@@ -130,62 +130,62 @@ impl ConvertedType {
/// 2 digits over).
pub const DECIMAL: ConvertedType = ConvertedType(5);
/// A Date
- ///
+ ///
/// Stored as days since Unix epoch, encoded as the INT32 physical type.
- ///
+ ///
pub const DATE: ConvertedType = ConvertedType(6);
/// A time
- ///
+ ///
/// The total number of milliseconds since midnight. The value is stored
/// as an INT32 physical type.
pub const TIME_MILLIS: ConvertedType = ConvertedType(7);
/// A time.
- ///
+ ///
/// The total number of microseconds since midnight. The value is stored as
/// an INT64 physical type.
pub const TIME_MICROS: ConvertedType = ConvertedType(8);
/// A date/time combination
- ///
+ ///
/// Date and time recorded as milliseconds since the Unix epoch. Recorded as
/// a physical type of INT64.
pub const TIMESTAMP_MILLIS: ConvertedType = ConvertedType(9);
/// A date/time combination
- ///
+ ///
/// Date and time recorded as microseconds since the Unix epoch. The value is
/// stored as an INT64 physical type.
pub const TIMESTAMP_MICROS: ConvertedType = ConvertedType(10);
/// An unsigned integer value.
- ///
+ ///
/// The number describes the maximum number of meaningful data bits in
/// the stored value. 8, 16 and 32 bit values are stored using the
/// INT32 physical type. 64 bit values are stored using the INT64
/// physical type.
- ///
+ ///
pub const UINT_8: ConvertedType = ConvertedType(11);
pub const UINT_16: ConvertedType = ConvertedType(12);
pub const UINT_32: ConvertedType = ConvertedType(13);
pub const UINT_64: ConvertedType = ConvertedType(14);
/// A signed integer value.
- ///
+ ///
/// The number describes the maximum number of meaningful data bits in
/// the stored value. 8, 16 and 32 bit values are stored using the
/// INT32 physical type. 64 bit values are stored using the INT64
/// physical type.
- ///
+ ///
pub const INT_8: ConvertedType = ConvertedType(15);
pub const INT_16: ConvertedType = ConvertedType(16);
pub const INT_32: ConvertedType = ConvertedType(17);
pub const INT_64: ConvertedType = ConvertedType(18);
/// An embedded JSON document
- ///
+ ///
/// A JSON document embedded within a single UTF8 column.
pub const JSON: ConvertedType = ConvertedType(19);
/// An embedded BSON document
- ///
+ ///
/// A BSON document embedded within a single BINARY column.
pub const BSON: ConvertedType = ConvertedType(20);
/// An interval of time
- ///
+ ///
/// This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12
/// This data is composed of three separate little endian unsigned
/// integers. Each stores a component of a duration of time. The first
@@ -443,11 +443,11 @@ impl From<&Encoding> for i32 {
}
/// Supported compression algorithms.
-///
+///
/// Codecs added in format version X.Y can be read by readers based on X.Y and later.
/// Codec support may vary between readers based on the format version and
/// libraries available at runtime.
-///
+///
/// See Compression.md for a detailed specification of these algorithms.
#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct CompressionCodec(pub i32);
@@ -640,14 +640,14 @@ impl From<&BoundaryOrder> for i32 {
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct Statistics {
/// DEPRECATED: min and max value of the column. Use min_value and max_value.
- ///
+ ///
/// Values are encoded using PLAIN encoding, except that variable-length byte
/// arrays do not include a length prefix.
- ///
+ ///
/// These fields encode min and max values determined by signed comparison
/// only. New files should use the correct order for a column's logical type
/// and store the values in the min_value and max_value fields.
- ///
+ ///
/// To support older readers, these may be set when the column order is
/// signed.
pub max: Option<Vec<u8>>,
@@ -657,7 +657,7 @@ pub struct Statistics {
/// count of distinct values occurring
pub distinct_count: Option<i64>,
/// Min and max values for the column, determined by its ColumnOrder.
- ///
+ ///
/// Values are encoded using PLAIN encoding, except that variable-length byte
/// arrays do not include a length prefix.
pub max_value: Option<Vec<u8>>,
@@ -1079,7 +1079,7 @@ impl Default for DateType {
//
/// Logical type to annotate a column that is always null.
-///
+///
/// Sometimes when discovering the schema of existing data, values are always
/// null and the physical type can't be determined. This annotation signals
/// the case where the physical type was guessed from all null values.
@@ -1132,10 +1132,10 @@ impl Default for NullType {
//
/// Decimal logical type annotation
-///
+///
/// To maintain forward-compatibility in v1, implementations using this logical
/// type must also set scale and precision on the annotated SchemaElement.
-///
+///
/// Allowed for physical types: INT32, INT64, FIXED, and BINARY
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct DecimalType {
@@ -1450,7 +1450,7 @@ impl TSerializable for TimeUnit {
//
/// Timestamp logical type annotation
-///
+///
/// Allowed for physical types: INT64
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct TimestampType {
@@ -1521,7 +1521,7 @@ impl TSerializable for TimestampType {
//
/// Time logical type annotation
-///
+///
/// Allowed for physical types: INT32 (millis), INT64 (micros, nanos)
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct TimeType {
@@ -1592,9 +1592,9 @@ impl TSerializable for TimeType {
//
/// Integer logical type annotation
-///
+///
/// bitWidth must be 8, 16, 32, or 64.
-///
+///
/// Allowed for physical types: INT32, INT64
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct IntType {
@@ -1665,7 +1665,7 @@ impl TSerializable for IntType {
//
/// Embedded JSON logical type annotation
-///
+///
/// Allowed for physical types: BINARY
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct JsonType {
@@ -1716,7 +1716,7 @@ impl Default for JsonType {
//
/// Embedded BSON logical type annotation
-///
+///
/// Allowed for physical types: BINARY
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct BsonType {
@@ -2020,12 +2020,12 @@ pub struct SchemaElement {
pub num_children: Option<i32>,
/// DEPRECATED: When the schema is the result of a conversion from another model.
/// Used to record the original type to help with cross conversion.
- ///
+ ///
/// This is superseded by logicalType.
pub converted_type: Option<ConvertedType>,
/// DEPRECATED: Used when this column contains decimal data.
/// See the DECIMAL converted type for more details.
- ///
+ ///
/// This is superseded by using the DecimalType annotation in logicalType.
pub scale: Option<i32>,
pub precision: Option<i32>,
@@ -2033,7 +2033,7 @@ pub struct SchemaElement {
/// original field id in the parquet schema
pub field_id: Option<i32>,
/// The logical type of this SchemaElement
- ///
+ ///
/// LogicalType replaces ConvertedType, but ConvertedType is still required
/// for some logical types to ensure forward-compatibility in format v1.
pub logical_type: Option<LogicalType>,
@@ -2444,7 +2444,7 @@ impl TSerializable for DictionaryPageHeader {
/// New page format allowing reading levels without decompressing the data
/// Repetition and definition levels are uncompressed
/// The remaining section containing the data is compressed if is_compressed is true
-///
+///
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct DataPageHeaderV2 {
/// Number of values, including NULLs, in this data page. *
@@ -2724,7 +2724,7 @@ impl TSerializable for BloomFilterAlgorithm {
/// Hash strategy type annotation. xxHash is an extremely fast non-cryptographic hash
/// algorithm. It uses 64 bits version of xxHash.
-///
+///
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct XxHash {
}
@@ -2847,7 +2847,7 @@ impl TSerializable for BloomFilterHash {
//
/// The compression used in the Bloom filter.
-///
+///
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct Uncompressed {
}
@@ -2971,7 +2971,7 @@ impl TSerializable for BloomFilterCompression {
/// Bloom filter header is stored at beginning of Bloom filter data of each column
/// and followed by its bitset.
-///
+///
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct BloomFilterHeader {
/// The size of bitset in bytes *
@@ -3101,7 +3101,7 @@ pub struct PageHeader {
/// encryption itself is performed after page compression (if compressed)
/// If enabled, this allows for disabling checksumming in HDFS if only a few
/// pages need to be read.
- ///
+ ///
pub crc: Option<i32>,
pub data_page_header: Option<DataPageHeader>,
pub index_page_header: Option<IndexPageHeader>,
@@ -3977,14 +3977,14 @@ impl TSerializable for ColumnCryptoMetaData {
pub struct ColumnChunk {
/// File where column data is stored. If not set, assumed to be same file as
/// metadata. This path is relative to the current file.
- ///
+ ///
pub file_path: Option<String>,
/// Byte offset in file_path to the ColumnMetaData *
pub file_offset: i64,
/// Column metadata for this chunk. This is the same content as what is at
/// file_path/file_offset. Having it here has it replicated in the file
/// metadata.
- ///
+ ///
pub meta_data: Option<ColumnMetaData>,
/// File offset of ColumnChunk's OffsetIndex *
pub offset_index_offset: Option<i64>,
@@ -4151,7 +4151,7 @@ impl TSerializable for ColumnChunk {
pub struct RowGroup {
/// Metadata for each column chunk in this row group.
/// This list must have the same order as the SchemaElement list in FileMetaData.
- ///
+ ///
pub columns: Vec<ColumnChunk>,
/// Total byte size of all the uncompressed column data in this row group *
pub total_byte_size: i64,
@@ -4521,7 +4521,7 @@ impl TSerializable for PageLocation {
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct OffsetIndex {
/// PageLocations, ordered by increasing PageLocation.offset. It is required
- /// that page_locations[i].first_row_index < page_locations[i+1].first_row_index.
+ /// that page_locations\[i\].first_row_index < page_locations\[i+1\].first_row_index.
pub page_locations: Vec<PageLocation>,
}
@@ -4587,27 +4587,27 @@ impl TSerializable for OffsetIndex {
//
/// Description for ColumnIndex.
-/// Each <array-field>[i] refers to the page at OffsetIndex.page_locations[i]
+/// Each <array-field>\[i\] refers to the page at OffsetIndex.page_locations\[i\]
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct ColumnIndex {
/// A list of Boolean values to determine the validity of the corresponding
/// min and max values. If true, a page contains only null values, and writers
/// have to set the corresponding entries in min_values and max_values to
- /// byte[0], so that all lists have the same length. If false, the
+ /// byte\[0\], so that all lists have the same length. If false, the
/// corresponding entries in min_values and max_values must be valid.
pub null_pages: Vec<bool>,
/// Two lists containing lower and upper bounds for the values of each page.
/// These may be the actual minimum and maximum values found on a page, but
/// can also be (more compact) values that do not exist on a page. For
/// example, instead of storing ""Blart Versenwald III", a writer may set
- /// min_values[i]="B", max_values[i]="C". Such more compact values must still
+ /// min_values\[i\]="B", max_values\[i\]="C". Such more compact values must still
/// be valid values within the column's logical type. Readers must make sure
/// that list entries are populated before using them by inspecting null_pages.
pub min_values: Vec<Vec<u8>>,
pub max_values: Vec<Vec<u8>>,
/// Stores whether both min_values and max_values are orderd and if so, in
/// which direction. This allows readers to perform binary searches in both
- /// lists. Readers cannot assume that max_values[i] <= min_values[i+1], even
+ /// lists. Readers cannot assume that max_values\[i\] <= min_values\[i+1\], even
/// if the lists are ordered.
pub boundary_order: BoundaryOrder,
/// A list containing the number of null values for each page *
@@ -5051,17 +5051,17 @@ pub struct FileMetaData {
/// String for application that wrote this file. This should be in the format
/// <Application> version <App Version> (build <App Build Hash>).
/// e.g. impala version 1.0 (build 6cf94d29b2b7115df4de2c06e2ab4326d721eb55)
- ///
+ ///
pub created_by: Option<String>,
/// Sort order used for the min_value and max_value fields of each column in
/// this file. Sort orders are listed in the order matching the columns in the
/// schema. The indexes are not necessary the same though, because only leaf
/// nodes of the schema are represented in the list of sort orders.
- ///
+ ///
/// Without column_orders, the meaning of the min_value and max_value fields is
/// undefined. To ensure well-defined behaviour, if min_value and max_value are
/// written to a Parquet file, column_orders must be written as well.
- ///
+ ///
/// The obsolete min and max fields are always sorted by signed comparison
/// regardless of column_orders.
pub column_orders: Option<Vec<ColumnOrder>>,