You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/11/24 19:21:17 UTC
[arrow-rs] branch master updated: Doc improvements (#3155)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 2460c7b5d Doc improvements (#3155)
2460c7b5d is described below
commit 2460c7b5da2ba22c7fb0ef0df6ac84984e3aed12
Author: Vrishabh <ps...@gmail.com>
AuthorDate: Fri Nov 25 00:51:11 2022 +0530
Doc improvements (#3155)
* Improving arrow-json docs
* Improving arrow-array docs
* Fix tests
* Fix typos
* Incorporate review comments
* Improve doc for fixed_size_list_builder
* Fix doc comments
---
arrow-array/src/arithmetic.rs | 22 +++
arrow-array/src/array/boolean_array.rs | 2 +-
arrow-array/src/array/list_array.rs | 2 +
arrow-array/src/array/mod.rs | 1 +
arrow-array/src/array/primitive_array.rs | 30 +++-
arrow-array/src/builder/boolean_buffer_builder.rs | 13 ++
arrow-array/src/builder/buffer_builder.rs | 32 +++++
.../src/builder/fixed_size_binary_builder.rs | 16 +++
arrow-array/src/builder/fixed_size_list_builder.rs | 39 ++++++
arrow-array/src/builder/map_builder.rs | 47 +++++++
arrow-array/src/builder/mod.rs | 6 +
arrow-array/src/builder/primitive_builder.rs | 35 +++++
.../src/builder/primitive_dictionary_builder.rs | 1 +
.../src/builder/string_dictionary_builder.rs | 1 +
arrow-array/src/builder/struct_builder.rs | 2 +
arrow-array/src/iterator.rs | 5 +
arrow-array/src/lib.rs | 3 +
arrow-array/src/record_batch.rs | 1 +
arrow-array/src/types.rs | 154 +++++++++++++++++----
arrow-json/src/lib.rs | 4 +
arrow-json/src/reader.rs | 2 +
21 files changed, 387 insertions(+), 31 deletions(-)
diff --git a/arrow-array/src/arithmetic.rs b/arrow-array/src/arithmetic.rs
index 566f3742e..dcb6a1be7 100644
--- a/arrow-array/src/arithmetic.rs
+++ b/arrow-array/src/arithmetic.rs
@@ -45,60 +45,82 @@ pub trait ArrowNativeTypeOp: ArrowNativeType {
/// The multiplicative identity
const ONE: Self;
+ /// Checked addition operation
fn add_checked(self, rhs: Self) -> Result<Self, ArrowError>;
+ /// Wrapping addition operation
fn add_wrapping(self, rhs: Self) -> Self;
+ /// Checked subtraction operation
fn sub_checked(self, rhs: Self) -> Result<Self, ArrowError>;
+ /// Wrapping subtraction operation
fn sub_wrapping(self, rhs: Self) -> Self;
+ /// Checked multiplication operation
fn mul_checked(self, rhs: Self) -> Result<Self, ArrowError>;
+ /// Wrapping multiplication operation
fn mul_wrapping(self, rhs: Self) -> Self;
+ /// Checked division operation
fn div_checked(self, rhs: Self) -> Result<Self, ArrowError>;
+ /// Wrapping division operation
fn div_wrapping(self, rhs: Self) -> Self;
+ /// Checked remainder operation
fn mod_checked(self, rhs: Self) -> Result<Self, ArrowError>;
+ /// Wrapping remainder operation
fn mod_wrapping(self, rhs: Self) -> Self;
+ /// Checked negation operation
fn neg_checked(self) -> Result<Self, ArrowError>;
+ /// Wrapping negation operation
fn neg_wrapping(self) -> Self;
+ /// Checked exponentiation operation
fn pow_checked(self, exp: u32) -> Result<Self, ArrowError>;
+ /// Wrapping exponentiation operation
fn pow_wrapping(self, exp: u32) -> Self;
+ /// Returns true if zero else false
fn is_zero(self) -> bool;
+ /// Compare operation
fn compare(self, rhs: Self) -> Ordering;
+ /// Equality operation
fn is_eq(self, rhs: Self) -> bool;
+ /// Not equal operation
#[inline]
fn is_ne(self, rhs: Self) -> bool {
!self.is_eq(rhs)
}
+ /// Less than operation
#[inline]
fn is_lt(self, rhs: Self) -> bool {
self.compare(rhs).is_lt()
}
+ /// Less than equals operation
#[inline]
fn is_le(self, rhs: Self) -> bool {
self.compare(rhs).is_le()
}
+ /// Greater than operation
#[inline]
fn is_gt(self, rhs: Self) -> bool {
self.compare(rhs).is_gt()
}
+ /// Greater than equals operation
#[inline]
fn is_ge(self, rhs: Self) -> bool {
self.compare(rhs).is_ge()
diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs
index 31dde3a3d..83af9760d 100644
--- a/arrow-array/src/array/boolean_array.rs
+++ b/arrow-array/src/array/boolean_array.rs
@@ -91,7 +91,7 @@ impl BooleanArray {
self.data.is_empty()
}
- // Returns a new boolean array builder
+ /// Returns a new boolean array builder
pub fn builder(capacity: usize) -> BooleanBuilder {
BooleanBuilder::with_capacity(capacity)
}
diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs
index 54699749f..204a36c32 100644
--- a/arrow-array/src/array/list_array.rs
+++ b/arrow-array/src/array/list_array.rs
@@ -29,7 +29,9 @@ use std::any::Any;
/// trait declaring an offset size, relevant for i32 vs i64 array types.
pub trait OffsetSizeTrait: ArrowNativeType + std::ops::AddAssign + Integer {
+ /// True for 64 bit offset size and false for 32 bit offset size
const IS_LARGE: bool;
+ /// Prefix for the offset size
const PREFIX: &'static str;
}
diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs
index 307753a71..5fc44d896 100644
--- a/arrow-array/src/array/mod.rs
+++ b/arrow-array/src/array/mod.rs
@@ -382,6 +382,7 @@ impl<'a, T: Array> Array for &'a T {
/// The value at null indexes is unspecified, and implementations must not rely on a specific
/// value such as [`Default::default`] being returned, however, it must not be undefined
pub trait ArrayAccessor: Array {
+ /// The Arrow type of the element being accessed.
type Item: Send + Sync;
/// Returns the element at index `i`
diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs
index bd68b9698..42d183238 100644
--- a/arrow-array/src/array/primitive_array.rs
+++ b/arrow-array/src/array/primitive_array.rs
@@ -165,21 +165,48 @@ pub type TimestampMicrosecondArray = PrimitiveArray<TimestampMicrosecondType>;
/// A primitive array where each element is of type `TimestampNanosecondType.`
/// See examples for [`TimestampSecondArray.`](crate::array::TimestampSecondArray)
pub type TimestampNanosecondArray = PrimitiveArray<TimestampNanosecondType>;
+
+// TODO: give examples for the below types
+
+/// A primitive array where each element is of 32-bit date type.
pub type Date32Array = PrimitiveArray<Date32Type>;
+/// A primitive array where each element is of 64-bit date type.
pub type Date64Array = PrimitiveArray<Date64Type>;
+
+/// An array where each element is of 32-bit type representing time elapsed in seconds
+/// since midnight.
pub type Time32SecondArray = PrimitiveArray<Time32SecondType>;
+/// An array where each element is of 32-bit type representing time elapsed in milliseconds
+/// since midnight.
pub type Time32MillisecondArray = PrimitiveArray<Time32MillisecondType>;
+/// An array where each element is of 64-bit type representing time elapsed in microseconds
+/// since midnight.
pub type Time64MicrosecondArray = PrimitiveArray<Time64MicrosecondType>;
+/// An array where each element is of 64-bit type representing time elapsed in nanoseconds
+/// since midnight.
pub type Time64NanosecondArray = PrimitiveArray<Time64NanosecondType>;
+
+/// An array where each element is a “calendar” interval in months.
pub type IntervalYearMonthArray = PrimitiveArray<IntervalYearMonthType>;
+/// An array where each element is a “calendar” interval days and milliseconds.
pub type IntervalDayTimeArray = PrimitiveArray<IntervalDayTimeType>;
+/// An array where each element is a “calendar” interval in months, days, and nanoseconds.
pub type IntervalMonthDayNanoArray = PrimitiveArray<IntervalMonthDayNanoType>;
+
+/// An array where each element is an elapsed time type in seconds.
pub type DurationSecondArray = PrimitiveArray<DurationSecondType>;
+/// An array where each element is an elapsed time type in milliseconds.
pub type DurationMillisecondArray = PrimitiveArray<DurationMillisecondType>;
+/// An array where each element is an elapsed time type in microseconds.
pub type DurationMicrosecondArray = PrimitiveArray<DurationMicrosecondType>;
+/// An array where each element is an elapsed time type in nanoseconds.
pub type DurationNanosecondArray = PrimitiveArray<DurationNanosecondType>;
+/// An array where each element is a 128-bits decimal with precision in [1, 38] and
+/// scale in [-38, 38].
pub type Decimal128Array = PrimitiveArray<Decimal128Type>;
+/// An array where each element is a 256-bits decimal with precision in [1, 76] and
+/// scale in [-76, 76].
pub type Decimal256Array = PrimitiveArray<Decimal256Type>;
/// Trait bridging the dynamic-typed nature of Arrow (via [`DataType`]) with the
@@ -256,7 +283,7 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
}
}
- // Returns a new primitive array builder
+ /// Returns a new primitive array builder
pub fn builder(capacity: usize) -> PrimitiveBuilder<T> {
PrimitiveBuilder::<T>::with_capacity(capacity)
}
@@ -749,6 +776,7 @@ impl<'a, T: ArrowPrimitiveType> PrimitiveArray<T> {
/// the type can be collected to `PrimitiveArray`.
#[derive(Debug)]
pub struct NativeAdapter<T: ArrowPrimitiveType> {
+ /// Corresponding Rust native type if available
pub native: Option<T::Native>,
}
diff --git a/arrow-array/src/builder/boolean_buffer_builder.rs b/arrow-array/src/builder/boolean_buffer_builder.rs
index 2ab01ccfe..4f8638ee7 100644
--- a/arrow-array/src/builder/boolean_buffer_builder.rs
+++ b/arrow-array/src/builder/boolean_buffer_builder.rs
@@ -19,6 +19,7 @@ use arrow_buffer::{bit_util, Buffer, MutableBuffer};
use arrow_data::bit_mask;
use std::ops::Range;
+/// A builder for creating a boolean [`Buffer`]
#[derive(Debug)]
pub struct BooleanBufferBuilder {
buffer: MutableBuffer,
@@ -26,6 +27,7 @@ pub struct BooleanBufferBuilder {
}
impl BooleanBufferBuilder {
+ /// Creates a new `BooleanBufferBuilder`
#[inline]
pub fn new(capacity: usize) -> Self {
let byte_capacity = bit_util::ceil(capacity, 8);
@@ -33,16 +35,19 @@ impl BooleanBufferBuilder {
Self { buffer, len: 0 }
}
+ /// Creates a new `BooleanBufferBuilder` from [`MutableBuffer`] of `len`
pub fn new_from_buffer(buffer: MutableBuffer, len: usize) -> Self {
assert!(len <= buffer.len() * 8);
Self { buffer, len }
}
+ /// Returns the length of the buffer
#[inline]
pub fn len(&self) -> usize {
self.len
}
+ /// Sets a bit in the buffer at `index`
#[inline]
pub fn set_bit(&mut self, index: usize, v: bool) {
if v {
@@ -52,21 +57,25 @@ impl BooleanBufferBuilder {
}
}
+ /// Gets a bit in the buffer at `index`
#[inline]
pub fn get_bit(&self, index: usize) -> bool {
bit_util::get_bit(self.buffer.as_slice(), index)
}
+ /// Returns true if empty
#[inline]
pub fn is_empty(&self) -> bool {
self.len == 0
}
+ /// Returns the capacity of the buffer
#[inline]
pub fn capacity(&self) -> usize {
self.buffer.capacity() * 8
}
+ /// Advances the buffer by `additional` bits
#[inline]
pub fn advance(&mut self, additional: usize) {
let new_len = self.len + additional;
@@ -99,6 +108,7 @@ impl BooleanBufferBuilder {
self.len = len;
}
+ /// Appends a boolean `v` into the buffer
#[inline]
pub fn append(&mut self, v: bool) {
self.advance(1);
@@ -107,6 +117,7 @@ impl BooleanBufferBuilder {
}
}
+ /// Appends n `additional` bits of value `v` into the buffer
#[inline]
pub fn append_n(&mut self, additional: usize, v: bool) {
self.advance(additional);
@@ -118,6 +129,7 @@ impl BooleanBufferBuilder {
}
}
+ /// Appends a slice of booleans into the buffer
#[inline]
pub fn append_slice(&mut self, slice: &[bool]) {
let additional = slice.len();
@@ -156,6 +168,7 @@ impl BooleanBufferBuilder {
self.buffer.as_slice()
}
+ /// Creates a [`Buffer`]
#[inline]
pub fn finish(&mut self) -> Buffer {
let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
diff --git a/arrow-array/src/builder/buffer_builder.rs b/arrow-array/src/builder/buffer_builder.rs
index d3146366d..d4eed0de9 100644
--- a/arrow-array/src/builder/buffer_builder.rs
+++ b/arrow-array/src/builder/buffer_builder.rs
@@ -21,47 +21,78 @@ use std::marker::PhantomData;
use crate::types::*;
+/// Buffer builder for signed 8-bit integer type.
pub type Int8BufferBuilder = BufferBuilder<i8>;
+/// Buffer builder for signed 16-bit integer type.
pub type Int16BufferBuilder = BufferBuilder<i16>;
+/// Buffer builder for signed 32-bit integer type.
pub type Int32BufferBuilder = BufferBuilder<i32>;
+/// Buffer builder for signed 64-bit integer type.
pub type Int64BufferBuilder = BufferBuilder<i64>;
+/// Buffer builder for usigned 8-bit integer type.
pub type UInt8BufferBuilder = BufferBuilder<u8>;
+/// Buffer builder for usigned 16-bit integer type.
pub type UInt16BufferBuilder = BufferBuilder<u16>;
+/// Buffer builder for usigned 32-bit integer type.
pub type UInt32BufferBuilder = BufferBuilder<u32>;
+/// Buffer builder for usigned 64-bit integer type.
pub type UInt64BufferBuilder = BufferBuilder<u64>;
+/// Buffer builder for 32-bit floating point type.
pub type Float32BufferBuilder = BufferBuilder<f32>;
+/// Buffer builder for 64-bit floating point type.
pub type Float64BufferBuilder = BufferBuilder<f64>;
+/// Buffer builder for timestamp type of second unit.
pub type TimestampSecondBufferBuilder =
BufferBuilder<<TimestampSecondType as ArrowPrimitiveType>::Native>;
+/// Buffer builder for timestamp type of millisecond unit.
pub type TimestampMillisecondBufferBuilder =
BufferBuilder<<TimestampMillisecondType as ArrowPrimitiveType>::Native>;
+/// Buffer builder for timestamp type of microsecond unit.
pub type TimestampMicrosecondBufferBuilder =
BufferBuilder<<TimestampMicrosecondType as ArrowPrimitiveType>::Native>;
+/// Buffer builder for timestamp type of nanosecond unit.
pub type TimestampNanosecondBufferBuilder =
BufferBuilder<<TimestampNanosecondType as ArrowPrimitiveType>::Native>;
+
+/// Buffer builder for 32-bit date type.
pub type Date32BufferBuilder = BufferBuilder<<Date32Type as ArrowPrimitiveType>::Native>;
+/// Buffer builder for 64-bit date type.
pub type Date64BufferBuilder = BufferBuilder<<Date64Type as ArrowPrimitiveType>::Native>;
+
+/// Buffer builder for 32-bit elaspsed time since midnight of second unit.
pub type Time32SecondBufferBuilder =
BufferBuilder<<Time32SecondType as ArrowPrimitiveType>::Native>;
+/// Buffer builder for 32-bit elaspsed time since midnight of millisecond unit.
pub type Time32MillisecondBufferBuilder =
BufferBuilder<<Time32MillisecondType as ArrowPrimitiveType>::Native>;
+/// Buffer builder for 64-bit elaspsed time since midnight of microsecond unit.
pub type Time64MicrosecondBufferBuilder =
BufferBuilder<<Time64MicrosecondType as ArrowPrimitiveType>::Native>;
+/// Buffer builder for 64-bit elaspsed time since midnight of nanosecond unit.
pub type Time64NanosecondBufferBuilder =
BufferBuilder<<Time64NanosecondType as ArrowPrimitiveType>::Native>;
+
+/// Buffer builder for “calendar” interval in months.
pub type IntervalYearMonthBufferBuilder =
BufferBuilder<<IntervalYearMonthType as ArrowPrimitiveType>::Native>;
+/// Buffer builder for “calendar” interval in days and milliseconds.
pub type IntervalDayTimeBufferBuilder =
BufferBuilder<<IntervalDayTimeType as ArrowPrimitiveType>::Native>;
+/// Buffer builder “calendar” interval in months, days, and nanoseconds.
pub type IntervalMonthDayNanoBufferBuilder =
BufferBuilder<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native>;
+
+/// Buffer builder for elaspsed time of second unit.
pub type DurationSecondBufferBuilder =
BufferBuilder<<DurationSecondType as ArrowPrimitiveType>::Native>;
+/// Buffer builder for elaspsed time of milliseconds unit.
pub type DurationMillisecondBufferBuilder =
BufferBuilder<<DurationMillisecondType as ArrowPrimitiveType>::Native>;
+/// Buffer builder for elaspsed time of microseconds unit.
pub type DurationMicrosecondBufferBuilder =
BufferBuilder<<DurationMicrosecondType as ArrowPrimitiveType>::Native>;
+/// Buffer builder for elaspsed time of nanoseconds unit.
pub type DurationNanosecondBufferBuilder =
BufferBuilder<<DurationNanosecondType as ArrowPrimitiveType>::Native>;
@@ -124,6 +155,7 @@ impl<T: ArrowNativeType> BufferBuilder<T> {
}
}
+ /// Creates a new builder from a [`MutableBuffer`]
pub fn new_from_buffer(buffer: MutableBuffer) -> Self {
let buffer_len = buffer.len();
Self {
diff --git a/arrow-array/src/builder/fixed_size_binary_builder.rs b/arrow-array/src/builder/fixed_size_binary_builder.rs
index e9581922c..4c8225adf 100644
--- a/arrow-array/src/builder/fixed_size_binary_builder.rs
+++ b/arrow-array/src/builder/fixed_size_binary_builder.rs
@@ -24,6 +24,22 @@ use arrow_schema::{ArrowError, DataType};
use std::any::Any;
use std::sync::Arc;
+/// A fixed size binary array builder
+/// ```
+/// use arrow_array::builder::FixedSizeBinaryBuilder;
+/// use arrow_array::Array;
+///
+/// let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
+/// // [b"hello", null, b"arrow"]
+/// builder.append_value(b"hello").unwrap();
+/// builder.append_null();
+/// builder.append_value(b"arrow").unwrap();
+///
+/// let array = builder.finish();
+/// assert_eq!(array.value(0), b"hello");
+/// assert!(array.is_null(1));
+/// assert_eq!(array.value(2), b"arrow");
+/// ```
#[derive(Debug)]
pub struct FixedSizeBinaryBuilder {
values_builder: UInt8BufferBuilder,
diff --git a/arrow-array/src/builder/fixed_size_list_builder.rs b/arrow-array/src/builder/fixed_size_list_builder.rs
index 516c22925..bc4ce466a 100644
--- a/arrow-array/src/builder/fixed_size_list_builder.rs
+++ b/arrow-array/src/builder/fixed_size_list_builder.rs
@@ -25,6 +25,44 @@ use std::any::Any;
use std::sync::Arc;
/// Array builder for [`FixedSizeListArray`]
+/// ```
+/// use arrow_array::{builder::{Int32Builder, FixedSizeListBuilder}, Array, Int32Array};
+/// let values_builder = Int32Builder::new();
+/// let mut builder = FixedSizeListBuilder::new(values_builder, 3);
+///
+/// // [[0, 1, 2], null, [3, null, 5], [6, 7, null]]
+/// builder.values().append_value(0);
+/// builder.values().append_value(1);
+/// builder.values().append_value(2);
+/// builder.append(true);
+/// builder.values().append_null();
+/// builder.values().append_null();
+/// builder.values().append_null();
+/// builder.append(false);
+/// builder.values().append_value(3);
+/// builder.values().append_null();
+/// builder.values().append_value(5);
+/// builder.append(true);
+/// builder.values().append_value(6);
+/// builder.values().append_value(7);
+/// builder.values().append_null();
+/// builder.append(true);
+/// let list_array = builder.finish();
+/// assert_eq!(
+/// *list_array.value(0),
+/// Int32Array::from(vec![Some(0), Some(1), Some(2)])
+/// );
+/// assert!(list_array.is_null(1));
+/// assert_eq!(
+/// *list_array.value(2),
+/// Int32Array::from(vec![Some(3), None, Some(5)])
+/// );
+/// assert_eq!(
+/// *list_array.value(3),
+/// Int32Array::from(vec![Some(6), Some(7), None])
+/// )
+/// ```
+///
#[derive(Debug)]
pub struct FixedSizeListBuilder<T: ArrayBuilder> {
null_buffer_builder: NullBufferBuilder,
@@ -104,6 +142,7 @@ where
&mut self.values_builder
}
+ /// Returns the length of the list
pub fn value_length(&self) -> i32 {
self.list_len
}
diff --git a/arrow-array/src/builder/map_builder.rs b/arrow-array/src/builder/map_builder.rs
index 5602f8863..737b4fa72 100644
--- a/arrow-array/src/builder/map_builder.rs
+++ b/arrow-array/src/builder/map_builder.rs
@@ -24,6 +24,43 @@ use arrow_schema::{ArrowError, DataType, Field};
use std::any::Any;
use std::sync::Arc;
+/// Creates a new `MapBuilder`
+/// ```
+/// use arrow_array::builder::{MapBuilder, Int32Builder, StringBuilder};
+/// use arrow_array::{StringArray, Int32Array};
+/// use std::sync::Arc;
+///
+/// let string_builder = StringBuilder::new();
+/// let int_builder = Int32Builder::with_capacity(4);
+///
+/// let mut builder = MapBuilder::new(None, string_builder, int_builder);
+///
+/// let string_builder = builder.keys();
+/// string_builder.append_value("joe");
+/// string_builder.append_null();
+/// string_builder.append_null();
+/// string_builder.append_value("mark");
+///
+/// let int_builder = builder.values();
+/// int_builder.append_value(1);
+/// int_builder.append_value(2);
+/// int_builder.append_null();
+/// int_builder.append_value(4);
+///
+/// builder.append(true).unwrap();
+/// builder.append(false).unwrap();
+/// builder.append(true).unwrap();
+///
+/// let arr = builder.finish();
+/// assert_eq!(
+/// *arr.values(),
+/// Int32Array::from(vec![Some(1), Some(2), None, Some(4)])
+/// );
+/// assert_eq!(
+/// *arr.keys(),
+/// StringArray::from(vec![Some("joe"), None, None, Some("mark")])
+/// );
+/// ```
#[derive(Debug)]
pub struct MapBuilder<K: ArrayBuilder, V: ArrayBuilder> {
offsets_builder: BufferBuilder<i32>,
@@ -33,10 +70,14 @@ pub struct MapBuilder<K: ArrayBuilder, V: ArrayBuilder> {
value_builder: V,
}
+/// Contains details of the mapping
#[derive(Debug, Clone)]
pub struct MapFieldNames {
+ /// [`Field`] name for map entries
pub entry: String,
+ /// [`Field`] name for map key
pub key: String,
+ /// [`Field`] name for map value
pub value: String,
}
@@ -52,6 +93,7 @@ impl Default for MapFieldNames {
#[allow(dead_code)]
impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
+ /// Creates a new `MapBuilder`
pub fn new(
field_names: Option<MapFieldNames>,
key_builder: K,
@@ -61,6 +103,7 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
Self::with_capacity(field_names, key_builder, value_builder, capacity)
}
+ /// Creates a new `MapBuilder` with capacity
pub fn with_capacity(
field_names: Option<MapFieldNames>,
key_builder: K,
@@ -79,10 +122,12 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
}
}
+ /// Returns the key array builder of the map
pub fn keys(&mut self) -> &mut K {
&mut self.key_builder
}
+ /// Returns the value array builder of the map
pub fn values(&mut self) -> &mut V {
&mut self.value_builder
}
@@ -104,6 +149,7 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
Ok(())
}
+ /// Builds the [`MapArray`]
pub fn finish(&mut self) -> MapArray {
let len = self.len();
@@ -144,6 +190,7 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
MapArray::from(array_data)
}
+ /// Builds the [`MapArray`] without resetting the builder.
pub fn finish_cloned(&self) -> MapArray {
let len = self.len();
diff --git a/arrow-array/src/builder/mod.rs b/arrow-array/src/builder/mod.rs
index eaf824397..3486e396b 100644
--- a/arrow-array/src/builder/mod.rs
+++ b/arrow-array/src/builder/mod.rs
@@ -128,11 +128,17 @@ pub trait ArrayBuilder: Any + Send {
fn into_box_any(self: Box<Self>) -> Box<dyn Any>;
}
+/// A list array builder with i32 offsets
pub type ListBuilder<T> = GenericListBuilder<i32, T>;
+/// A list array builder with i64 offsets
pub type LargeListBuilder<T> = GenericListBuilder<i64, T>;
+/// A binary array builder with i32 offsets
pub type BinaryBuilder = GenericBinaryBuilder<i32>;
+/// A binary array builder with i64 offsets
pub type LargeBinaryBuilder = GenericBinaryBuilder<i64>;
+/// A string array builder with i32 offsets
pub type StringBuilder = GenericStringBuilder<i32>;
+/// A string array builder with i64 offsets
pub type LargeStringBuilder = GenericStringBuilder<i64>;
diff --git a/arrow-array/src/builder/primitive_builder.rs b/arrow-array/src/builder/primitive_builder.rs
index 7a1fbafc7..ef420dcbc 100644
--- a/arrow-array/src/builder/primitive_builder.rs
+++ b/arrow-array/src/builder/primitive_builder.rs
@@ -24,36 +24,69 @@ use arrow_data::ArrayData;
use std::any::Any;
use std::sync::Arc;
+/// A signed 8-bit integer array builder.
pub type Int8Builder = PrimitiveBuilder<Int8Type>;
+/// A signed 16-bit integer array builder.
pub type Int16Builder = PrimitiveBuilder<Int16Type>;
+/// A signed 32-bit integer array builder.
pub type Int32Builder = PrimitiveBuilder<Int32Type>;
+/// A signed 64-bit integer array builder.
pub type Int64Builder = PrimitiveBuilder<Int64Type>;
+/// An usigned 8-bit integer array builder.
pub type UInt8Builder = PrimitiveBuilder<UInt8Type>;
+/// An usigned 16-bit integer array builder.
pub type UInt16Builder = PrimitiveBuilder<UInt16Type>;
+/// An usigned 32-bit integer array builder.
pub type UInt32Builder = PrimitiveBuilder<UInt32Type>;
+/// An usigned 64-bit integer array builder.
pub type UInt64Builder = PrimitiveBuilder<UInt64Type>;
+/// A 32-bit floating point array builder.
pub type Float32Builder = PrimitiveBuilder<Float32Type>;
+/// A 64-bit floating point array builder.
pub type Float64Builder = PrimitiveBuilder<Float64Type>;
+/// A timestamp second array builder.
pub type TimestampSecondBuilder = PrimitiveBuilder<TimestampSecondType>;
+/// A timestamp millisecond array builder.
pub type TimestampMillisecondBuilder = PrimitiveBuilder<TimestampMillisecondType>;
+/// A timestamp microsecond array builder.
pub type TimestampMicrosecondBuilder = PrimitiveBuilder<TimestampMicrosecondType>;
+/// A timestamp nanosecond array builder.
pub type TimestampNanosecondBuilder = PrimitiveBuilder<TimestampNanosecondType>;
+
+/// A 32-bit date array builder.
pub type Date32Builder = PrimitiveBuilder<Date32Type>;
+/// A 64-bit date array builder.
pub type Date64Builder = PrimitiveBuilder<Date64Type>;
+
+/// A 32-bit elaspsed time in seconds array builder.
pub type Time32SecondBuilder = PrimitiveBuilder<Time32SecondType>;
+/// A 32-bit elaspsed time in milliseconds array builder.
pub type Time32MillisecondBuilder = PrimitiveBuilder<Time32MillisecondType>;
+/// A 64-bit elaspsed time in microseconds array builder.
pub type Time64MicrosecondBuilder = PrimitiveBuilder<Time64MicrosecondType>;
+/// A 64-bit elaspsed time in nanoseconds array builder.
pub type Time64NanosecondBuilder = PrimitiveBuilder<Time64NanosecondType>;
+
+/// A “calendar” interval in months array builder.
pub type IntervalYearMonthBuilder = PrimitiveBuilder<IntervalYearMonthType>;
+/// A “calendar” interval in days and milliseconds array builder.
pub type IntervalDayTimeBuilder = PrimitiveBuilder<IntervalDayTimeType>;
+/// A “calendar” interval in months, days, and nanoseconds array builder.
pub type IntervalMonthDayNanoBuilder = PrimitiveBuilder<IntervalMonthDayNanoType>;
+
+/// An elapsed time in seconds array builder.
pub type DurationSecondBuilder = PrimitiveBuilder<DurationSecondType>;
+/// An elapsed time in milliseconds array builder.
pub type DurationMillisecondBuilder = PrimitiveBuilder<DurationMillisecondType>;
+/// An elapsed time in microseconds array builder.
pub type DurationMicrosecondBuilder = PrimitiveBuilder<DurationMicrosecondType>;
+/// An elapsed time in nanoseconds array builder.
pub type DurationNanosecondBuilder = PrimitiveBuilder<DurationNanosecondType>;
+/// A decimal 128 array builder
pub type Decimal128Builder = PrimitiveBuilder<Decimal128Type>;
+/// A decimal 256 array builder
pub type Decimal256Builder = PrimitiveBuilder<Decimal256Type>;
/// Array builder for fixed-width primitive types
@@ -120,6 +153,7 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
}
}
+ /// Creates a new primitive array builder from buffers
pub fn new_from_buffer(
values_buffer: MutableBuffer,
null_buffer: Option<MutableBuffer>,
@@ -157,6 +191,7 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
self.values_builder.advance(1);
}
+ /// Appends `n` no. of null's into the builder
#[inline]
pub fn append_nulls(&mut self, n: usize) {
self.null_buffer_builder.append_n_nulls(n);
diff --git a/arrow-array/src/builder/primitive_dictionary_builder.rs b/arrow-array/src/builder/primitive_dictionary_builder.rs
index 5b8a72835..4640902d8 100644
--- a/arrow-array/src/builder/primitive_dictionary_builder.rs
+++ b/arrow-array/src/builder/primitive_dictionary_builder.rs
@@ -193,6 +193,7 @@ where
Ok(key)
}
+ /// Appends a null slot into the builder
#[inline]
pub fn append_null(&mut self) {
self.keys_builder.append_null()
diff --git a/arrow-array/src/builder/string_dictionary_builder.rs b/arrow-array/src/builder/string_dictionary_builder.rs
index f44756b6b..878cfc727 100644
--- a/arrow-array/src/builder/string_dictionary_builder.rs
+++ b/arrow-array/src/builder/string_dictionary_builder.rs
@@ -270,6 +270,7 @@ where
Ok(key)
}
+ /// Appends a null slot into the builder
#[inline]
pub fn append_null(&mut self) {
self.keys_builder.append_null()
diff --git a/arrow-array/src/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs
index 98d0e1a1d..12bcaf094 100644
--- a/arrow-array/src/builder/struct_builder.rs
+++ b/arrow-array/src/builder/struct_builder.rs
@@ -174,6 +174,7 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
}
impl StructBuilder {
+ /// Creates a new `StructBuilder`
pub fn new(fields: Vec<Field>, field_builders: Vec<Box<dyn ArrayBuilder>>) -> Self {
Self {
fields,
@@ -182,6 +183,7 @@ impl StructBuilder {
}
}
+ /// Creates a new `StructBuilder` from vector of [`Field`] with `capacity`
pub fn from_fields(fields: Vec<Field>, capacity: usize) -> Self {
let mut builders = Vec::with_capacity(fields.len());
for field in &fields {
diff --git a/arrow-array/src/iterator.rs b/arrow-array/src/iterator.rs
index 351f90bac..e7c5e8367 100644
--- a/arrow-array/src/iterator.rs
+++ b/arrow-array/src/iterator.rs
@@ -116,10 +116,15 @@ impl<T: ArrayAccessor> ExactSizeIterator for ArrayIter<T> {}
/// an iterator that returns Some(T) or None, that can be used on any PrimitiveArray
pub type PrimitiveIter<'a, T> = ArrayIter<&'a PrimitiveArray<T>>;
+/// an iterator that returns Some(T) or None, that can be used on any BooleanArray
pub type BooleanIter<'a> = ArrayIter<&'a BooleanArray>;
+/// an iterator that returns Some(T) or None, that can be used on any Utf8Array
pub type GenericStringIter<'a, T> = ArrayIter<&'a GenericStringArray<T>>;
+/// an iterator that returns Some(T) or None, that can be used on any BinaryArray
pub type GenericBinaryIter<'a, T> = ArrayIter<&'a GenericBinaryArray<T>>;
+/// an iterator that returns Some(T) or None, that can be used on any FixedSizeBinaryArray
pub type FixedSizeBinaryIter<'a> = ArrayIter<&'a FixedSizeBinaryArray>;
+/// an iterator that returns Some(T) or None, that can be used on any ListArray
pub type GenericListArrayIter<'a, O> = ArrayIter<&'a GenericListArray<O>>;
#[cfg(test)]
diff --git a/arrow-array/src/lib.rs b/arrow-array/src/lib.rs
index 15267d308..5fcd1f33d 100644
--- a/arrow-array/src/lib.rs
+++ b/arrow-array/src/lib.rs
@@ -158,6 +158,9 @@
//! [`json`]: https://docs.rs/arrow/latest/arrow/json/index.html
//! [`csv`]: https://docs.rs/arrow/latest/arrow/csv/index.html
+#![deny(rustdoc::broken_intra_doc_links)]
+#![warn(missing_docs)]
+
pub mod array;
pub use array::*;
diff --git a/arrow-array/src/record_batch.rs b/arrow-array/src/record_batch.rs
index 6f2385fa9..ea0eb3853 100644
--- a/arrow-array/src/record_batch.rs
+++ b/arrow-array/src/record_batch.rs
@@ -417,6 +417,7 @@ pub struct RecordBatchOptions {
}
impl RecordBatchOptions {
+ /// Creates a new `RecordBatchOptions`
pub fn new() -> Self {
Self {
match_field_names: true,
diff --git a/arrow-array/src/types.rs b/arrow-array/src/types.rs
index 13194d61f..0646a7f29 100644
--- a/arrow-array/src/types.rs
+++ b/arrow-array/src/types.rs
@@ -34,16 +34,19 @@ use std::ops::{Add, Sub};
// BooleanType is special: its bit-width is not the size of the primitive type, and its `index`
// operation assumes bit-packing.
+/// A boolean datatype
#[derive(Debug)]
pub struct BooleanType {}
impl BooleanType {
+ /// Type represetings is arrow [`DataType`]
pub const DATA_TYPE: DataType = DataType::Boolean;
}
macro_rules! make_type {
- ($name:ident, $native_ty:ty, $data_ty:expr) => {
+ ($name:ident, $native_ty:ty, $data_ty:expr, $doc_string: literal) => {
#[derive(Debug)]
+ #[doc = $doc_string]
pub struct $name {}
impl ArrowPrimitiveType for $name {
@@ -53,89 +56,168 @@ macro_rules! make_type {
};
}
-make_type!(Int8Type, i8, DataType::Int8);
-make_type!(Int16Type, i16, DataType::Int16);
-make_type!(Int32Type, i32, DataType::Int32);
-make_type!(Int64Type, i64, DataType::Int64);
-make_type!(UInt8Type, u8, DataType::UInt8);
-make_type!(UInt16Type, u16, DataType::UInt16);
-make_type!(UInt32Type, u32, DataType::UInt32);
-make_type!(UInt64Type, u64, DataType::UInt64);
-make_type!(Float16Type, f16, DataType::Float16);
-make_type!(Float32Type, f32, DataType::Float32);
-make_type!(Float64Type, f64, DataType::Float64);
+make_type!(Int8Type, i8, DataType::Int8, "A signed 8-bit integer type.");
+make_type!(
+ Int16Type,
+ i16,
+ DataType::Int16,
+ "A signed 16-bit integer type."
+);
+make_type!(
+ Int32Type,
+ i32,
+ DataType::Int32,
+ "A signed 32-bit integer type."
+);
+make_type!(
+ Int64Type,
+ i64,
+ DataType::Int64,
+ "A signed 64-bit integer type."
+);
+make_type!(
+ UInt8Type,
+ u8,
+ DataType::UInt8,
+ "An unsigned 8-bit integer type."
+);
+make_type!(
+ UInt16Type,
+ u16,
+ DataType::UInt16,
+ "An unsigned 16-bit integer type."
+);
+make_type!(
+ UInt32Type,
+ u32,
+ DataType::UInt32,
+ "An unsigned 32-bit integer type."
+);
+make_type!(
+ UInt64Type,
+ u64,
+ DataType::UInt64,
+ "An unsigned 64-bit integer type."
+);
+make_type!(
+ Float16Type,
+ f16,
+ DataType::Float16,
+ "A 16-bit floating point number type."
+);
+make_type!(
+ Float32Type,
+ f32,
+ DataType::Float32,
+ "A 32-bit floating point number type."
+);
+make_type!(
+ Float64Type,
+ f64,
+ DataType::Float64,
+ "A 64-bit floating point number type."
+);
make_type!(
TimestampSecondType,
i64,
- DataType::Timestamp(TimeUnit::Second, None)
+ DataType::Timestamp(TimeUnit::Second, None),
+ "A timestamp second type with an optional timezone."
);
make_type!(
TimestampMillisecondType,
i64,
- DataType::Timestamp(TimeUnit::Millisecond, None)
+ DataType::Timestamp(TimeUnit::Millisecond, None),
+ "A timestamp millisecond type with an optional timezone."
);
make_type!(
TimestampMicrosecondType,
i64,
- DataType::Timestamp(TimeUnit::Microsecond, None)
+ DataType::Timestamp(TimeUnit::Microsecond, None),
+ "A timestamp microsecond type with an optional timezone."
);
make_type!(
TimestampNanosecondType,
i64,
- DataType::Timestamp(TimeUnit::Nanosecond, None)
+ DataType::Timestamp(TimeUnit::Nanosecond, None),
+ "A timestamp nanosecond type with an optional timezone."
+);
+make_type!(
+ Date32Type,
+ i32,
+ DataType::Date32,
+ "A 32-bit date type representing the elapsed time since UNIX epoch in days(32 bits)."
+);
+make_type!(
+ Date64Type,
+ i64,
+ DataType::Date64,
+ "A 64-bit date type representing the elapsed time since UNIX epoch in days(32 bits)."
+);
+make_type!(
+ Time32SecondType,
+ i32,
+ DataType::Time32(TimeUnit::Second),
+ "A 32-bit time type representing the elapsed time since midnight in seconds."
);
-make_type!(Date32Type, i32, DataType::Date32);
-make_type!(Date64Type, i64, DataType::Date64);
-make_type!(Time32SecondType, i32, DataType::Time32(TimeUnit::Second));
make_type!(
Time32MillisecondType,
i32,
- DataType::Time32(TimeUnit::Millisecond)
+ DataType::Time32(TimeUnit::Millisecond),
+ "A 32-bit time type representing the elapsed time since midnight in milliseconds."
);
make_type!(
Time64MicrosecondType,
i64,
- DataType::Time64(TimeUnit::Microsecond)
+ DataType::Time64(TimeUnit::Microsecond),
+ "A 64-bit time type representing the elapsed time since midnight in microseconds."
);
make_type!(
Time64NanosecondType,
i64,
- DataType::Time64(TimeUnit::Nanosecond)
+ DataType::Time64(TimeUnit::Nanosecond),
+ "A 64-bit time type representing the elapsed time since midnight in nanoseconds."
);
make_type!(
IntervalYearMonthType,
i32,
- DataType::Interval(IntervalUnit::YearMonth)
+ DataType::Interval(IntervalUnit::YearMonth),
+ "A “calendar” interval type in months."
);
make_type!(
IntervalDayTimeType,
i64,
- DataType::Interval(IntervalUnit::DayTime)
+ DataType::Interval(IntervalUnit::DayTime),
+ "A “calendar” interval type in days and milliseconds."
);
make_type!(
IntervalMonthDayNanoType,
i128,
- DataType::Interval(IntervalUnit::MonthDayNano)
+ DataType::Interval(IntervalUnit::MonthDayNano),
+ "A “calendar” interval type in months, days, and nanoseconds."
);
make_type!(
DurationSecondType,
i64,
- DataType::Duration(TimeUnit::Second)
+ DataType::Duration(TimeUnit::Second),
+ "An elapsed time type in seconds."
);
make_type!(
DurationMillisecondType,
i64,
- DataType::Duration(TimeUnit::Millisecond)
+ DataType::Duration(TimeUnit::Millisecond),
+ "An elapsed time type in milliseconds."
);
make_type!(
DurationMicrosecondType,
i64,
- DataType::Duration(TimeUnit::Microsecond)
+ DataType::Duration(TimeUnit::Microsecond),
+ "An elapsed time type in microseconds."
);
make_type!(
DurationNanosecondType,
i64,
- DataType::Duration(TimeUnit::Nanosecond)
+ DataType::Duration(TimeUnit::Nanosecond),
+ "An elapsed time type in nanoseconds."
);
/// A subtype of primitive type that represents legal dictionary keys.
@@ -489,10 +571,15 @@ mod decimal {
pub trait DecimalType:
'static + Send + Sync + ArrowPrimitiveType + decimal::DecimalTypeSealed
{
+ /// Width of the type
const BYTE_LENGTH: usize;
+ /// Maximum number of significant digits
const MAX_PRECISION: u8;
+ /// Maximum no of digits after the decimal point (note the scale can be negative)
const MAX_SCALE: i8;
+ /// fn to create its [`DataType`]
const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType;
+ /// Default values for [`DataType`]
const DEFAULT_TYPE: DataType;
/// "Decimal128" or "Decimal256", for use in error messages
@@ -621,10 +708,15 @@ pub(crate) mod bytes {
///
/// See [Variable Size Binary Layout](https://arrow.apache.org/docs/format/Columnar.html#variable-size-binary-layout)
pub trait ByteArrayType: 'static + Send + Sync + bytes::ByteArrayTypeSealed {
+ /// Type of offset i.e i32/i64
type Offset: OffsetSizeTrait;
+ /// Type for representing its equivalent rust type i.e
+ /// Utf8Array will have native type has &str
+ /// BinaryArray will have type as [u8]
type Native: bytes::ByteArrayNativeType + AsRef<[u8]> + ?Sized;
/// "Binary" or "String", for use in error messages
const PREFIX: &'static str;
+ /// Datatype of array elements
const DATA_TYPE: DataType;
}
@@ -645,7 +737,9 @@ impl<O: OffsetSizeTrait> ByteArrayType for GenericStringType<O> {
};
}
+/// An arrow utf8 array with i32 offsets
pub type Utf8Type = GenericStringType<i32>;
+/// An arrow utf8 array with i64 offsets
pub type LargeUtf8Type = GenericStringType<i64>;
/// [`ByteArrayType`] for binary arrays
@@ -665,7 +759,9 @@ impl<O: OffsetSizeTrait> ByteArrayType for GenericBinaryType<O> {
};
}
+/// An arrow binary array with i32 offsets
pub type BinaryType = GenericBinaryType<i32>;
+/// An arrow binary array with i64 offsets
pub type LargeBinaryType = GenericBinaryType<i64>;
#[cfg(test)]
diff --git a/arrow-json/src/lib.rs b/arrow-json/src/lib.rs
index 21f96d90a..0f1c0064f 100644
--- a/arrow-json/src/lib.rs
+++ b/arrow-json/src/lib.rs
@@ -19,6 +19,9 @@
//! line-delimited records. See the module level documentation for the
//! [`reader`] and [`writer`] for usage examples.
+#![deny(rustdoc::broken_intra_doc_links)]
+#![warn(missing_docs)]
+
pub mod reader;
pub mod writer;
@@ -30,6 +33,7 @@ use serde_json::{Number, Value};
/// Trait declaring any type that is serializable to JSON. This includes all primitive types (bool, i32, etc.).
pub trait JsonSerializable: 'static {
+ /// Converts self into json value if its possible
fn into_json_value(self) -> Option<Value>;
}
diff --git a/arrow-json/src/reader.rs b/arrow-json/src/reader.rs
index 646d9c0d1..0d3148c5a 100644
--- a/arrow-json/src/reader.rs
+++ b/arrow-json/src/reader.rs
@@ -198,6 +198,7 @@ pub struct ValueIter<'a, R: Read> {
}
impl<'a, R: Read> ValueIter<'a, R> {
+ /// Creates a new `ValueIter`
pub fn new(reader: &'a mut BufReader<R>, max_read_records: Option<usize>) -> Self {
Self {
reader,
@@ -613,6 +614,7 @@ impl Default for DecoderOptions {
}
impl DecoderOptions {
+ /// Creates a new `DecoderOptions`
pub fn new() -> Self {
Default::default()
}