You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/11/13 06:14:46 UTC
[arrow-rs] branch master updated: Minor: Add diagrams and documentation to row format (#3094)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new c7210ce2b Minor: Add diagrams and documentation to row format (#3094)
c7210ce2b is described below
commit c7210ce2b5190eba83afe42d078b5aac0cfbd7cf
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Sun Nov 13 01:14:42 2022 -0500
Minor: Add diagrams and documentation to row format (#3094)
Co-authored-by: Raphael Taylor-Davies <r....@googlemail.com>
---
arrow/src/row/mod.rs | 191 +++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 162 insertions(+), 29 deletions(-)
diff --git a/arrow/src/row/mod.rs b/arrow/src/row/mod.rs
index 4dd2a33c0..1d0a58d95 100644
--- a/arrow/src/row/mod.rs
+++ b/arrow/src/row/mod.rs
@@ -15,15 +15,42 @@
// specific language governing permissions and limitations
// under the License.
-//! A comparable row-oriented representation of a collection of [`Array`]
+//! A comparable row-oriented representation of a collection of [`Array`].
//!
-//! As [`Row`] are [normalized for sorting], they can be very efficiently [compared](PartialOrd),
+//! [`Row`]s are [normalized for sorting], and can be very efficiently [compared],
//! using [`memcmp`] under the hood, or used in [non-comparison sorts] such as [radix sort]. This
//! makes the row format ideal for implementing efficient multi-column sorting,
//! grouping, aggregation, windowing and more.
//!
-//! _Comparing [`Rows`] generated by different [`RowConverter`] is not guaranteed to
-//! yield a meaningful ordering_
+//! The format is described in more detail on [`RowConverter`] as well as the
+//! [Fast and Memory Efficient Multi-Column Sorts in Apache Arrow Rust](https://arrow.apache.org/blog/2022/11/07/multi-column-sorts-in-arrow-rust-part-1/) article.
+//!
+//! _[`Rows`] generated by different [`RowConverter`] are arbitrarily
+//! ordered. The same [`RowConverter`] must be used for the comparison
+//! to be well defined._
+//!
+//! For example, given three input [`Array`]s, this code creates byte
+//! sequences that [compare] the same as when using [`lexsort`].
+//!
+//! ```text
+//! ┌─────┐ ┌─────┐ ┌─────┐
+//! │ │ │ │ │ │
+//! ├─────┤ ┌ ┼─────┼ ─ ┼─────┼ ┐ ┏━━━━━━━━━━━━━┓
+//! │ │ │ │ │ │ ─────────────▶┃ ┃
+//! ├─────┤ └ ┼─────┼ ─ ┼─────┼ ┘ ┗━━━━━━━━━━━━━┛
+//! │ │ │ │ │ │
+//! └─────┘ └─────┘ └─────┘
+//! ...
+//! ┌─────┐ ┌ ┬─────┬ ─ ┬─────┬ ┐ ┏━━━━━━━━┓
+//! │ │ │ │ │ │ ─────────────▶┃ ┃
+//! └─────┘ └ ┴─────┴ ─ ┴─────┴ ┘ ┗━━━━━━━━┛
+//! UInt64 Utf8 F64
+//!
+//! Input Arrays Row Format
+//! (Columns)
+//! ```
+//!
+//! # Basic Example
//! ```
//! # use std::sync::Arc;
//! # use arrow::row::{RowConverter, SortField};
@@ -73,7 +100,9 @@
//! assert_eq!(&c2_values, &["a", "f", "c", "e"]);
//! ```
//!
-//! It can also be used to implement a fast multi-column / lexicographic sort
+//! # Lexsort
+//!
+//! The row format can also be used to implement a fast multi-column / lexicographic sort
//!
//! ```
//! # use arrow::row::{RowConverter, SortField};
@@ -95,6 +124,9 @@
//! [radix sort]:[https://en.wikipedia.org/wiki/Radix_sort]
//! [normalized for sorting]:[https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.83.1080&rep=rep1&type=pdf]
//! [`memcmp`]:[https://www.man7.org/linux/man-pages/man3/memcmp.3.html]
+//! [`lexsort`]: crate::compute::kernels::sort::lexsort
+//! [compared]: PartialOrd
+//! [compare]: PartialOrd
use std::cmp::Ordering;
use std::hash::{Hash, Hasher};
@@ -119,38 +151,75 @@ mod fixed;
mod interner;
mod variable;
-/// Converts [`ArrayRef`] columns into a row-oriented format.
+/// Converts [`ArrayRef`] columns into a [row-oriented](self) format.
+///
+/// *Note: The encoding of the row format may change from release to release.*
+///
+/// ## Overview
///
-/// # Format
+/// The row format is a variable length byte sequence created by
+/// concatenating the encoded form of each column. The encoding for
+/// each column depends on its datatype (and sort options).
///
-/// The encoding of the row format should not be considered stable, but is documented here
-/// for reference.
+/// The encoding is carefully designed in such a way that escaping is
+/// unnecessary: it is never ambiguous as to whether a byte is part of
+/// a sentinel (e.g. null) or a value.
///
/// ## Unsigned Integer Encoding
///
/// A null integer is encoded as a `0_u8`, followed by a zero-ed number of bytes corresponding
-/// to the integer's length
+/// to the integer's length.
///
/// A valid integer is encoded as `1_u8`, followed by the big-endian representation of the
-/// integer
+/// integer.
+///
+/// ```text
+/// ┌──┬──┬──┬──┐ ┌──┬──┬──┬──┬──┐
+/// 3 │03│00│00│00│ │01│00│00│00│03│
+/// └──┴──┴──┴──┘ └──┴──┴──┴──┴──┘
+/// ┌──┬──┬──┬──┐ ┌──┬──┬──┬──┬──┐
+/// 258 │02│01│00│00│ │01│00│00│01│02│
+/// └──┴──┴──┴──┘ └──┴──┴──┴──┴──┘
+/// ┌──┬──┬──┬──┐ ┌──┬──┬──┬──┬──┐
+/// 23423 │7F│5B│00│00│ │01│00│00│5B│7F│
+/// └──┴──┴──┴──┘ └──┴──┴──┴──┴──┘
+/// ┌──┬──┬──┬──┐ ┌──┬──┬──┬──┬──┐
+/// NULL │??│??│??│??│ │00│00│00│00│00│
+/// └──┴──┴──┴──┘ └──┴──┴──┴──┴──┘
+///
+/// 32-bit (4 bytes) Row Format
+/// Value Little Endian
+/// ```
///
/// ## Signed Integer Encoding
///
/// Signed integers have their most significant sign bit flipped, and are then encoded in the
-/// same manner as an unsigned integer
+/// same manner as an unsigned integer.
+///
+/// ```text
+/// ┌──┬──┬──┬──┐ ┌──┬──┬──┬──┐ ┌──┬──┬──┬──┬──┐
+/// 5 │05│00│00│00│ │05│00│00│80│ │01│80│00│00│05│
+/// └──┴──┴──┴──┘ └──┴──┴──┴──┘ └──┴──┴──┴──┴──┘
+/// ┌──┬──┬──┬──┐ ┌──┬──┬──┬──┐ ┌──┬──┬──┬──┬──┐
+/// -5 │FB│FF│FF│FF│ │FB│FF│FF│7F│ │01│7F│FF│FF│FB│
+/// └──┴──┴──┴──┘ └──┴──┴──┴──┘ └──┴──┴──┴──┴──┘
+///
+/// Value 32-bit (4 bytes) High bit flipped Row Format
+/// Little Endian
+/// ```
///
/// ## Float Encoding
///
/// Floats are converted from IEEE 754 representation to a signed integer representation
/// by flipping all bar the sign bit if they are negative.
///
-/// They are then encoded in the same manner as a signed integer
+/// They are then encoded in the same manner as a signed integer.
///
-/// ## Variable Length Bytes Encoding
+/// ## Variable Length Bytes (including Strings) Encoding
///
-/// A null is encoded as a `0_u8`
+/// A null is encoded as a `0_u8`.
///
-/// An empty byte array is encoded as `1_u8`
+/// An empty byte array is encoded as `1_u8`.
///
/// A non-null, non-empty byte array is encoded as `2_u8` followed by the byte array
/// encoded using a block based scheme described below.
@@ -158,9 +227,38 @@ mod variable;
/// The byte array is broken up into 32-byte blocks, each block is written in turn
/// to the output, followed by `0xFF_u8`. The final block is padded to 32-bytes
/// with `0_u8` and written to the output, followed by the un-padded length in bytes
-/// of this final block as a `u8`
+/// of this final block as a `u8`.
+///
+/// Note the following example encodings use a block size of 4 bytes,
+/// as opposed to 32 bytes for brevity:
+///
+/// ```text
+/// ┌───┬───┬───┬───┬───┬───┐
+/// "MEEP" │02 │'M'│'E'│'E'│'P'│04 │
+/// └───┴───┴───┴───┴───┴───┘
+///
+/// ┌───┐
+/// "" │01 |
+/// └───┘
+///
+/// NULL ┌───┐
+/// │00 │
+/// └───┘
+///
+/// "Defenestration" ┌───┬───┬───┬───┬───┬───┐
+/// │02 │'D'│'e'│'f'│'e'│FF │
+/// └───┼───┼───┼───┼───┼───┤
+/// │'n'│'e'│'s'│'t'│FF │
+/// ├───┼───┼───┼───┼───┤
+/// │'r'│'a'│'t'│'r'│FF │
+/// ├───┼───┼───┼───┼───┤
+/// │'a'│'t'│'i'│'o'│FF │
+/// ├───┼───┼───┼───┼───┤
+/// │'n'│00 │00 │00 │01 │
+/// └───┴───┴───┴───┴───┘
+/// ```
///
-/// This is loosely inspired by [COBS] encoding, and chosen over more traditional
+/// This approach is loosely inspired by [COBS] encoding, and chosen over more traditional
/// [byte stuffing] as it is more amenable to vectorisation, in particular AVX-256.
///
/// ## Dictionary Encoding
@@ -170,15 +268,48 @@ mod variable;
/// the dictionary encoding, and encode the array values directly, however, this would lose
/// the benefits of dictionary encoding to reduce memory and CPU consumption.
///
-/// As such the [`RowConverter`] maintains an order-preserving dictionary encoding for each
-/// dictionary encoded column. As this is a variable-length encoding, new dictionary values
-/// can be added whilst preserving the sort order.
+/// As such the [`RowConverter`] creates an order-preserving mapping
+/// for each dictionary encoded column, which allows new dictionary
+/// values to be added whilst preserving the sort order.
///
/// A null dictionary value is encoded as `0_u8`.
///
/// A non-null dictionary value is encoded as `1_u8` followed by a null-terminated byte array
/// key determined by the order-preserving dictionary encoding
///
+/// ```text
+/// ┌──────────┐ ┌─────┐
+/// │ "Bar" │ ───────────────▶│ 01 │
+/// └──────────┘ └─────┘
+/// ┌──────────┐ ┌─────┬─────┐
+/// │"Fabulous"│ ───────────────▶│ 01 │ 02 │
+/// └──────────┘ └─────┴─────┘
+/// ┌──────────┐ ┌─────┐
+/// │ "Soup" │ ───────────────▶│ 05 │
+/// └──────────┘ └─────┘
+/// ┌──────────┐ ┌─────┐
+/// │ "ZZ" │ ───────────────▶│ 07 │
+/// └──────────┘ └─────┘
+///
+/// Example Order Preserving Mapping
+/// ```
+/// Using the map above, the corresponding row format will be
+///
+/// ```text
+/// ┌─────┬─────┬─────┬─────┐
+/// "Fabulous" │ 01 │ 03 │ 05 │ 00 │
+/// └─────┴─────┴─────┴─────┘
+///
+/// ┌─────┬─────┬─────┐
+/// "ZZ" │ 01 │ 07 │ 00 │
+/// └─────┴─────┴─────┘
+///
+/// ┌─────┐
+/// NULL │ 00 │
+/// └─────┘
+///
+/// Input Row Format
+/// ```
/// # Ordering
///
/// ## Float Ordering
@@ -199,8 +330,8 @@ mod variable;
///
/// The order of a given column can be reversed by negating the encoded bytes of non-null values
///
-/// [COBS]:[https://en.wikipedia.org/wiki/Consistent_Overhead_Byte_Stuffing]
-/// [byte stuffing]:[https://en.wikipedia.org/wiki/High-Level_Data_Link_Control#Asynchronous_framing]
+/// [COBS]: https://en.wikipedia.org/wiki/Consistent_Overhead_Byte_Stuffing
+/// [byte stuffing]: https://en.wikipedia.org/wiki/High-Level_Data_Link_Control#Asynchronous_framing
#[derive(Debug)]
pub struct RowConverter {
fields: Arc<[SortField]>,
@@ -351,9 +482,9 @@ impl RowConverter {
}
}
-/// A row-oriented representation of arrow data, that is normalized for comparison
+/// A row-oriented representation of arrow data, that is normalized for comparison.
///
-/// See [`RowConverter`]
+/// See the [module level documentation](self) and [`RowConverter`] for more details.
#[derive(Debug)]
pub struct Rows {
/// Underlying row bytes
@@ -439,12 +570,14 @@ impl<'a> DoubleEndedIterator for RowsIter<'a> {
}
}
-/// A comparable representation of a row
+/// A comparable representation of a row.
///
-/// Two [`Row`] can be compared if they both belong to [`Rows`] returned by calls to
-/// [`RowConverter::convert_columns`] on the same [`RowConverter`]
+/// See the [module level documentation](self) for more details.
///
-/// Otherwise any ordering established by comparing the [`Row`] is arbitrary
+/// Two [`Row`] can only be compared if they both belong to [`Rows`]
+/// returned by calls to [`RowConverter::convert_columns`] on the same
+/// [`RowConverter`]. If different [`RowConverter`]s are used, any
+/// ordering established by comparing the [`Row`] is arbitrary.
#[derive(Debug, Copy, Clone)]
pub struct Row<'a> {
data: &'a [u8],