You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/11/13 06:14:46 UTC

[arrow-rs] branch master updated: Minor: Add diagrams and documentation to row format (#3094)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new c7210ce2b Minor: Add diagrams and documentation to row format (#3094)
c7210ce2b is described below

commit c7210ce2b5190eba83afe42d078b5aac0cfbd7cf
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Sun Nov 13 01:14:42 2022 -0500

    Minor: Add diagrams and documentation to row format (#3094)
    
    Co-authored-by: Raphael Taylor-Davies <r....@googlemail.com>
---
 arrow/src/row/mod.rs | 191 +++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 162 insertions(+), 29 deletions(-)

diff --git a/arrow/src/row/mod.rs b/arrow/src/row/mod.rs
index 4dd2a33c0..1d0a58d95 100644
--- a/arrow/src/row/mod.rs
+++ b/arrow/src/row/mod.rs
@@ -15,15 +15,42 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! A comparable row-oriented representation of a collection of [`Array`]
+//! A comparable row-oriented representation of a collection of [`Array`].
 //!
-//! As [`Row`] are [normalized for sorting], they can be very efficiently [compared](PartialOrd),
+//! [`Row`]s are [normalized for sorting], and can be very efficiently [compared],
 //! using [`memcmp`] under the hood, or used in [non-comparison sorts] such as [radix sort]. This
 //! makes the row format ideal for implementing efficient multi-column sorting,
 //! grouping, aggregation, windowing and more.
 //!
-//! _Comparing [`Rows`] generated by different [`RowConverter`] is not guaranteed to
-//! yield a meaningful ordering_
+//! The format is described in more detail on [`RowConverter`] as well as the
+//! [Fast and Memory Efficient Multi-Column Sorts in Apache Arrow Rust](https://arrow.apache.org/blog/2022/11/07/multi-column-sorts-in-arrow-rust-part-1/) article.
+//!
+//! _[`Rows`] generated by different [`RowConverter`] are arbitrarily
+//! ordered. The same [`RowConverter`] must be used for the comparison
+//! to be well defined._
+//!
+//! For example, given three input [`Array`]s, this code creates byte
+//! sequences that [compare] the same as when using [`lexsort`].
+//!
+//! ```text
+//!    ┌─────┐   ┌─────┐   ┌─────┐
+//!    │     │   │     │   │     │
+//!    ├─────┤ ┌ ┼─────┼ ─ ┼─────┼ ┐              ┏━━━━━━━━━━━━━┓
+//!    │     │   │     │   │     │  ─────────────▶┃             ┃
+//!    ├─────┤ └ ┼─────┼ ─ ┼─────┼ ┘              ┗━━━━━━━━━━━━━┛
+//!    │     │   │     │   │     │
+//!    └─────┘   └─────┘   └─────┘
+//!                ...
+//!    ┌─────┐ ┌ ┬─────┬ ─ ┬─────┬ ┐              ┏━━━━━━━━┓
+//!    │     │   │     │   │     │  ─────────────▶┃        ┃
+//!    └─────┘ └ ┴─────┴ ─ ┴─────┴ ┘              ┗━━━━━━━━┛
+//!     UInt64      Utf8     F64
+//!
+//!           Input Arrays                          Row Format
+//!     (Columns)
+//! ```
+//!
+//! # Basic Example
 //! ```
 //! # use std::sync::Arc;
 //! # use arrow::row::{RowConverter, SortField};
@@ -73,7 +100,9 @@
 //! assert_eq!(&c2_values, &["a", "f", "c", "e"]);
 //! ```
 //!
-//! It can also be used to implement a fast multi-column / lexicographic sort
+//! # Lexsort
+//!
+//! The row format can also be used to implement a fast multi-column / lexicographic sort
 //!
 //! ```
 //! # use arrow::row::{RowConverter, SortField};
@@ -95,6 +124,9 @@
 //! [radix sort]:[https://en.wikipedia.org/wiki/Radix_sort]
 //! [normalized for sorting]:[https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.83.1080&rep=rep1&type=pdf]
 //! [`memcmp`]:[https://www.man7.org/linux/man-pages/man3/memcmp.3.html]
+//! [`lexsort`]: crate::compute::kernels::sort::lexsort
+//! [compared]: PartialOrd
+//! [compare]: PartialOrd
 
 use std::cmp::Ordering;
 use std::hash::{Hash, Hasher};
@@ -119,38 +151,75 @@ mod fixed;
 mod interner;
 mod variable;
 
-/// Converts [`ArrayRef`] columns into a row-oriented format.
+/// Converts [`ArrayRef`] columns into a [row-oriented](self) format.
+///
+/// *Note: The encoding of the row format may change from release to release.*
+///
+/// ## Overview
 ///
-/// # Format
+/// The row format is a variable length byte sequence created by
+/// concatenating the encoded form of each column. The encoding for
+/// each column depends on its datatype (and sort options).
 ///
-/// The encoding of the row format should not be considered stable, but is documented here
-/// for reference.
+/// The encoding is carefully designed in such a way that escaping is
+/// unnecessary: it is never ambiguous as to whether a byte is part of
+/// a sentinel (e.g. null) or a value.
 ///
 /// ## Unsigned Integer Encoding
 ///
 /// A null integer is encoded as a `0_u8`, followed by a zero-ed number of bytes corresponding
-/// to the integer's length
+/// to the integer's length.
 ///
 /// A valid integer is encoded as `1_u8`, followed by the big-endian representation of the
-/// integer
+/// integer.
+///
+/// ```text
+///               ┌──┬──┬──┬──┐      ┌──┬──┬──┬──┬──┐
+///    3          │03│00│00│00│      │01│00│00│00│03│
+///               └──┴──┴──┴──┘      └──┴──┴──┴──┴──┘
+///               ┌──┬──┬──┬──┐      ┌──┬──┬──┬──┬──┐
+///   258         │02│01│00│00│      │01│00│00│01│02│
+///               └──┴──┴──┴──┘      └──┴──┴──┴──┴──┘
+///               ┌──┬──┬──┬──┐      ┌──┬──┬──┬──┬──┐
+///  23423        │7F│5B│00│00│      │01│00│00│5B│7F│
+///               └──┴──┴──┴──┘      └──┴──┴──┴──┴──┘
+///               ┌──┬──┬──┬──┐      ┌──┬──┬──┬──┬──┐
+///  NULL         │??│??│??│??│      │00│00│00│00│00│
+///               └──┴──┴──┴──┘      └──┴──┴──┴──┴──┘
+///
+///              32-bit (4 bytes)        Row Format
+///  Value        Little Endian
+/// ```
 ///
 /// ## Signed Integer Encoding
 ///
 /// Signed integers have their most significant sign bit flipped, and are then encoded in the
-/// same manner as an unsigned integer
+/// same manner as an unsigned integer.
+///
+/// ```text
+///        ┌──┬──┬──┬──┐       ┌──┬──┬──┬──┐       ┌──┬──┬──┬──┬──┐
+///     5  │05│00│00│00│       │05│00│00│80│       │01│80│00│00│05│
+///        └──┴──┴──┴──┘       └──┴──┴──┴──┘       └──┴──┴──┴──┴──┘
+///        ┌──┬──┬──┬──┐       ┌──┬──┬──┬──┐       ┌──┬──┬──┬──┬──┐
+///    -5  │FB│FF│FF│FF│       │FB│FF│FF│7F│       │01│7F│FF│FF│FB│
+///        └──┴──┴──┴──┘       └──┴──┴──┴──┘       └──┴──┴──┴──┴──┘
+///
+///  Value  32-bit (4 bytes)    High bit flipped      Row Format
+///          Little Endian
+/// ```
 ///
 /// ## Float Encoding
 ///
 /// Floats are converted from IEEE 754 representation to a signed integer representation
 /// by flipping all bar the sign bit if they are negative.
 ///
-/// They are then encoded in the same manner as a signed integer
+/// They are then encoded in the same manner as a signed integer.
 ///
-/// ## Variable Length Bytes Encoding
+/// ## Variable Length Bytes (including Strings) Encoding
 ///
-/// A null is encoded as a `0_u8`
+/// A null is encoded as a `0_u8`.
 ///
-/// An empty byte array is encoded as `1_u8`
+/// An empty byte array is encoded as `1_u8`.
 ///
 /// A non-null, non-empty byte array is encoded as `2_u8` followed by the byte array
 /// encoded using a block based scheme described below.
@@ -158,9 +227,38 @@ mod variable;
 /// The byte array is broken up into 32-byte blocks, each block is written in turn
 /// to the output, followed by `0xFF_u8`. The final block is padded to 32-bytes
 /// with `0_u8` and written to the output, followed by the un-padded length in bytes
-/// of this final block as a `u8`
+/// of this final block as a `u8`.
+///
+/// Note the following example encodings use a block size of 4 bytes,
+/// as opposed to 32 bytes for brevity:
+///
+/// ```text
+///                       ┌───┬───┬───┬───┬───┬───┐
+///  "MEEP"               │02 │'M'│'E'│'E'│'P'│04 │
+///                       └───┴───┴───┴───┴───┴───┘
+///
+///                       ┌───┐
+///  ""                   │01 |
+///                       └───┘
+///
+///  NULL                 ┌───┐
+///                       │00 │
+///                       └───┘
+///
+/// "Defenestration"      ┌───┬───┬───┬───┬───┬───┐
+///                       │02 │'D'│'e'│'f'│'e'│FF │
+///                       └───┼───┼───┼───┼───┼───┤
+///                           │'n'│'e'│'s'│'t'│FF │
+///                           ├───┼───┼───┼───┼───┤
+///                           │'r'│'a'│'t'│'r'│FF │
+///                           ├───┼───┼───┼───┼───┤
+///                           │'a'│'t'│'i'│'o'│FF │
+///                           ├───┼───┼───┼───┼───┤
+///                           │'n'│00 │00 │00 │01 │
+///                           └───┴───┴───┴───┴───┘
+/// ```
 ///
-/// This is loosely inspired by [COBS] encoding, and chosen over more traditional
+/// This approach is loosely inspired by [COBS] encoding, and chosen over more traditional
 /// [byte stuffing] as it is more amenable to vectorisation, in particular AVX-256.
 ///
 /// ## Dictionary Encoding
@@ -170,15 +268,48 @@ mod variable;
 /// the dictionary encoding, and encode the array values directly, however, this would lose
 /// the benefits of dictionary encoding to reduce memory and CPU consumption.
 ///
-/// As such the [`RowConverter`] maintains an order-preserving dictionary encoding for each
-/// dictionary encoded column. As this is a variable-length encoding, new dictionary values
-/// can be added whilst preserving the sort order.
+/// As such the [`RowConverter`] creates an order-preserving mapping
+/// for each dictionary encoded column, which allows new dictionary
+/// values to be added whilst preserving the sort order.
 ///
 /// A null dictionary value is encoded as `0_u8`.
 ///
 /// A non-null dictionary value is encoded as `1_u8` followed by a null-terminated byte array
 /// key determined by the order-preserving dictionary encoding
 ///
+/// ```text
+/// ┌──────────┐                 ┌─────┐
+/// │  "Bar"   │ ───────────────▶│ 01  │
+/// └──────────┘                 └─────┘
+/// ┌──────────┐                 ┌─────┬─────┐
+/// │"Fabulous"│ ───────────────▶│ 01  │ 02  │
+/// └──────────┘                 └─────┴─────┘
+/// ┌──────────┐                 ┌─────┐
+/// │  "Soup"  │ ───────────────▶│ 05  │
+/// └──────────┘                 └─────┘
+/// ┌──────────┐                 ┌─────┐
+/// │   "ZZ"   │ ───────────────▶│ 07  │
+/// └──────────┘                 └─────┘
+///
+/// Example Order Preserving Mapping
+/// ```
+/// Using the map above, the corresponding row format will be
+///
+/// ```text
+///                           ┌─────┬─────┬─────┬─────┐
+///    "Fabulous"             │ 01  │ 03  │ 05  │ 00  │
+///                           └─────┴─────┴─────┴─────┘
+///
+///                           ┌─────┬─────┬─────┐
+///    "ZZ"                   │ 01  │ 07  │ 00  │
+///                           └─────┴─────┴─────┘
+///
+///                           ┌─────┐
+///     NULL                  │ 00  │
+///                           └─────┘
+///
+///      Input                  Row Format
+/// ```
 /// # Ordering
 ///
 /// ## Float Ordering
@@ -199,8 +330,8 @@ mod variable;
 ///
 /// The order of a given column can be reversed by negating the encoded bytes of non-null values
 ///
-/// [COBS]:[https://en.wikipedia.org/wiki/Consistent_Overhead_Byte_Stuffing]
-/// [byte stuffing]:[https://en.wikipedia.org/wiki/High-Level_Data_Link_Control#Asynchronous_framing]
+/// [COBS]: https://en.wikipedia.org/wiki/Consistent_Overhead_Byte_Stuffing
+/// [byte stuffing]: https://en.wikipedia.org/wiki/High-Level_Data_Link_Control#Asynchronous_framing
 #[derive(Debug)]
 pub struct RowConverter {
     fields: Arc<[SortField]>,
@@ -351,9 +482,9 @@ impl RowConverter {
     }
 }
 
-/// A row-oriented representation of arrow data, that is normalized for comparison
+/// A row-oriented representation of arrow data, that is normalized for comparison.
 ///
-/// See [`RowConverter`]
+/// See the [module level documentation](self) and [`RowConverter`] for more details.
 #[derive(Debug)]
 pub struct Rows {
     /// Underlying row bytes
@@ -439,12 +570,14 @@ impl<'a> DoubleEndedIterator for RowsIter<'a> {
     }
 }
 
-/// A comparable representation of a row
+/// A comparable representation of a row.
 ///
-/// Two [`Row`] can be compared if they both belong to [`Rows`] returned by calls to
-/// [`RowConverter::convert_columns`] on the same [`RowConverter`]
+/// See the [module level documentation](self) for more details.
 ///
-/// Otherwise any ordering established by comparing the [`Row`] is arbitrary
+/// Two [`Row`] can only be compared if they both belong to [`Rows`]
+/// returned by calls to [`RowConverter::convert_columns`] on the same
+/// [`RowConverter`]. If different [`RowConverter`]s are used, any
+/// ordering established by comparing the [`Row`] is arbitrary.
 #[derive(Debug, Copy, Clone)]
 pub struct Row<'a> {
     data: &'a [u8],