You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2021/01/09 11:18:09 UTC
[arrow] branch master updated: ARROW-11168: [Rust] [Doc] Fix cargo doc warnings

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 08cccd6  ARROW-11168: [Rust] [Doc] Fix cargo doc warnings
08cccd6 is described below

commit 08cccd68802c9ddc3ca0a5d4bad6e4ba382d74b4
Author: mqy <me...@gmail.com>
AuthorDate: Sat Jan 9 06:17:08 2021 -0500

    ARROW-11168: [Rust] [Doc] Fix cargo doc warnings
    
    The PR fixed cargo doc warnings (including the generated flatbuffer files), and corrected several typos.
    To catch all possible cargo doc warnings, it looks like we have to run `cargo doc` for both nightly and stable.
    
    I had filed an issue https://issues.apache.org/jira/browse/ARROW-11179 to address the cargo doc problem about generated flatbuffer files, hope this can be resolved soon. Currently, the patch is created as `format-0ed34c83.patch` for regen.sh.
    
    Closes #9133 from mqy/ARROW-11168_fix_cargo_warnings
    
    Authored-by: mqy <me...@gmail.com>
    Signed-off-by: Andrew Lamb <an...@nerdnetworks.org>
---
 rust/arrow/format-0ed34c83.patch             | 220 +++++++++++++++++++++++++++
 rust/arrow/regen.sh                          |  26 ++--
 rust/arrow/src/array/array_primitive.rs      |   2 +-
 rust/arrow/src/array/equal/structure.rs      |   2 +-
 rust/arrow/src/datatypes.rs                  |   2 +-
 rust/arrow/src/ffi.rs                        |  10 +-
 rust/arrow/src/ipc/gen/Message.rs            |   2 +-
 rust/arrow/src/ipc/gen/Schema.rs             |   5 +-
 rust/arrow/src/ipc/gen/SparseTensor.rs       |  50 +++---
 rust/arrow/src/lib.rs                        |   6 +-
 rust/datafusion/src/error.rs                 |   4 +-
 rust/datafusion/src/logical_plan/plan.rs     |  10 +-
 rust/datafusion/src/physical_plan/mod.rs     |   2 +-
 rust/datafusion/src/physical_plan/parquet.rs |   4 +-
 rust/parquet/Cargo.toml                      |   2 +
 rust/parquet/src/arrow/levels.rs             |   8 +-
 rust/parquet/src/basic.rs                    |   2 +-
 rust/parquet/src/column/page.rs              |   2 +-
 rust/parquet/src/encodings/rle.rs            |   2 +-
 rust/parquet/src/record/reader.rs            |  18 +--
 20 files changed, 299 insertions(+), 80 deletions(-)

diff --git a/rust/arrow/format-0ed34c83.patch b/rust/arrow/format-0ed34c83.patch
new file mode 100644
index 0000000..5da0a0c
--- /dev/null
+++ b/rust/arrow/format-0ed34c83.patch
@@ -0,0 +1,220 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+diff --git a/format/Message.fbs b/format/Message.fbs
+index 1a7e0dfff..f1c18d765 100644
+--- a/format/Message.fbs
++++ b/format/Message.fbs
+@@ -28,7 +28,7 @@ namespace org.apache.arrow.flatbuf;
+ /// Metadata about a field at some level of a nested type tree (but not
+ /// its children).
+ ///
+-/// For example, a List<Int16> with values [[1, 2, 3], null, [4], [5, 6], null]
++/// For example, a List<Int16> with values `[[1, 2, 3], null, [4], [5, 6], null]`
+ /// would have {length: 5, null_count: 2} for its List node, and {length: 6,
+ /// null_count: 0} for its Int16 node, as separate FieldNode structs
+ struct FieldNode {
+diff --git a/format/Schema.fbs b/format/Schema.fbs
+index 3b37e5d85..3b00dd478 100644
+--- a/format/Schema.fbs
++++ b/format/Schema.fbs
+@@ -110,10 +110,11 @@ table FixedSizeList {
+ /// not enforced.
+ ///
+ /// Map
++/// ```text
+ ///   - child[0] entries: Struct
+ ///     - child[0] key: K
+ ///     - child[1] value: V
+-///
++/// ```
+ /// Neither the "entries" field nor the "key" field may be nullable.
+ ///
+ /// The metadata is structured so that Arrow systems without special handling
+@@ -129,7 +130,7 @@ enum UnionMode:short { Sparse, Dense }
+ /// A union is a complex type with children in Field
+ /// By default ids in the type vector refer to the offsets in the children
+ /// optionally typeIds provides an indirection between the child offset and the type id
+-/// for each child typeIds[offset] is the id used in the type vector
++/// for each child `typeIds[offset]` is the id used in the type vector
+ table Union {
+   mode: UnionMode;
+   typeIds: [ int ]; // optional, describes typeid of each child.
+diff --git a/format/SparseTensor.fbs b/format/SparseTensor.fbs
+index 3fe8a7582..a6fd2f9e7 100644
+--- a/format/SparseTensor.fbs
++++ b/format/SparseTensor.fbs
+@@ -37,21 +37,21 @@ namespace org.apache.arrow.flatbuf;
+ ///
+ /// For example, let X be a 2x3x4x5 tensor, and it has the following
+ /// 6 non-zero values:
+-///
++/// ```text
+ ///   X[0, 1, 2, 0] := 1
+ ///   X[1, 1, 2, 3] := 2
+ ///   X[0, 2, 1, 0] := 3
+ ///   X[0, 1, 3, 0] := 4
+ ///   X[0, 1, 2, 1] := 5
+ ///   X[1, 2, 0, 4] := 6
+-///
++/// ```
+ /// In COO format, the index matrix of X is the following 4x6 matrix:
+-///
++/// ```text
+ ///   [[0, 0, 0, 0, 1, 1],
+ ///    [1, 1, 1, 2, 1, 2],
+ ///    [2, 2, 3, 1, 2, 0],
+ ///    [0, 1, 0, 0, 3, 4]]
+-///
++/// ```
+ /// When isCanonical is true, the indices is sorted in lexicographical order
+ /// (row-major order), and it does not have duplicated entries.  Otherwise,
+ /// the indices may not be sorted, or may have duplicated entries.
+@@ -86,26 +86,27 @@ table SparseMatrixIndexCSX {
+ 
+   /// indptrBuffer stores the location and size of indptr array that
+   /// represents the range of the rows.
+-  /// The i-th row spans from indptr[i] to indptr[i+1] in the data.
++  /// The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data.
+   /// The length of this array is 1 + (the number of rows), and the type
+   /// of index value is long.
+   ///
+   /// For example, let X be the following 6x4 matrix:
+-  ///
++  /// ```text
+   ///   X := [[0, 1, 2, 0],
+   ///         [0, 0, 3, 0],
+   ///         [0, 4, 0, 5],
+   ///         [0, 0, 0, 0],
+   ///         [6, 0, 7, 8],
+   ///         [0, 9, 0, 0]].
+-  ///
++  /// ```
+   /// The array of non-zero values in X is:
+-  ///
++  /// ```text
+   ///   values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9].
+-  ///
++  /// ```
+   /// And the indptr of X is:
+-  ///
++  /// ```text
+   ///   indptr(X) = [0, 2, 3, 5, 5, 8, 10].
++  /// ```
+   indptrBuffer: Buffer (required);
+ 
+   /// The type of values in indicesBuffer
+@@ -116,9 +117,9 @@ table SparseMatrixIndexCSX {
+   /// The type of index value is long.
+   ///
+   /// For example, the indices of the above X is:
+-  ///
++  /// ```text
+   ///   indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
+-  ///
++  /// ```
+   /// Note that the indices are sorted in lexicographical order for each row.
+   indicesBuffer: Buffer (required);
+ }
+@@ -126,7 +127,7 @@ table SparseMatrixIndexCSX {
+ /// Compressed Sparse Fiber (CSF) sparse tensor index.
+ table SparseTensorIndexCSF {
+   /// CSF is a generalization of compressed sparse row (CSR) index.
+-  /// See [smith2017knl]: http://shaden.io/pub-files/smith2017knl.pdf
++  /// See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf)
+   ///
+   /// CSF index recursively compresses each dimension of a tensor into a set
+   /// of prefix trees. Each path from a root to leaf forms one tensor
+@@ -135,7 +136,7 @@ table SparseTensorIndexCSF {
+   ///
+   /// For example, let X be a 2x3x4x5 tensor and let it have the following
+   /// 8 non-zero values:
+-  ///
++  /// ```text
+   ///   X[0, 0, 0, 1] := 1
+   ///   X[0, 0, 0, 2] := 2
+   ///   X[0, 1, 0, 0] := 3
+@@ -144,9 +145,9 @@ table SparseTensorIndexCSF {
+   ///   X[1, 1, 1, 0] := 6
+   ///   X[1, 1, 1, 1] := 7
+   ///   X[1, 1, 1, 2] := 8
+-  ///
++  /// ```
+   /// As a prefix tree this would be represented as:
+-  ///
++  /// ```text
+   ///         0          1
+   ///        / \         |
+   ///       0   1        1
+@@ -154,24 +155,24 @@ table SparseTensorIndexCSF {
+   ///     0   0   1      1
+   ///    /|  /|   |    /| |
+   ///   1 2 0 2   0   0 1 2
+-
++  /// ```
+   /// The type of values in indptrBuffers
+   indptrType: Int (required);
+ 
+   /// indptrBuffers stores the sparsity structure.
+   /// Each two consecutive dimensions in a tensor correspond to a buffer in
+-  /// indptrBuffers. A pair of consecutive values at indptrBuffers[dim][i]
+-  /// and indptrBuffers[dim][i + 1] signify a range of nodes in
+-  /// indicesBuffers[dim + 1] who are children of indicesBuffers[dim][i] node.
++  /// indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]`
++  /// and `indptrBuffers[dim][i + 1]` signify a range of nodes in
++  /// `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node.
+   ///
+   /// For example, the indptrBuffers for the above X is:
+-  ///
++  /// ```text
+   ///   indptrBuffer(X) = [
+   ///                       [0, 2, 3],
+   ///                       [0, 1, 3, 4],
+   ///                       [0, 2, 4, 5, 8]
+   ///                     ].
+-  ///
++  /// ```
+   indptrBuffers: [Buffer] (required);
+ 
+   /// The type of values in indicesBuffers
+@@ -180,22 +181,22 @@ table SparseTensorIndexCSF {
+   /// indicesBuffers stores values of nodes.
+   /// Each tensor dimension corresponds to a buffer in indicesBuffers.
+   /// For example, the indicesBuffers for the above X is:
+-  ///
++  /// ```text
+   ///   indicesBuffer(X) = [
+   ///                        [0, 1],
+   ///                        [0, 1, 1],
+   ///                        [0, 0, 1, 1],
+   ///                        [1, 2, 0, 2, 0, 0, 1, 2]
+   ///                      ].
+-  ///
++  /// ```
+   indicesBuffers: [Buffer] (required);
+ 
+   /// axisOrder stores the sequence in which dimensions were traversed to
+   /// produce the prefix tree.
+   /// For example, the axisOrder for the above X is:
+-  ///
++  /// ```text
+   ///   axisOrder(X) = [0, 1, 2, 3].
+-  ///
++  /// ```
+   axisOrder: [int] (required);
+ }
+
diff --git a/rust/arrow/regen.sh b/rust/arrow/regen.sh
index 4bc35a4..e96f11e 100755
--- a/rust/arrow/regen.sh
+++ b/rust/arrow/regen.sh
@@ -54,9 +54,17 @@ echo "run: bazel build :flatc ..."
 bazel build :flatc
 popd
 
+FB_PATCH="rust/arrow/format-0ed34c83.patch"
+echo "Patch flatbuffer files with ${FB_PATCH} for cargo doc"
+echo "NOTE: the patch MAY need update in case of changes in format/*.fbs"
+git apply --check ${FB_PATCH} && git apply ${FB_PATCH}
+
 # Execute the code generation:
 $FLATC --filename-suffix "" --rust -o rust/arrow/src/ipc/gen/ format/*.fbs
 
+# Reset changes to format/
+git checkout -- format
+
 # Now the files are wrongly named so we have to change that.
 popd
 pushd $DIR/src/ipc/gen
@@ -97,7 +105,6 @@ names=("File" "Message" "Schema" "SparseTensor" "Tensor")
 
 # Remove all generated lines we don't need
 for f in `ls *.rs`; do
-
     if [[ $f == "mod.rs" ]]; then
         continue
     fi
@@ -147,17 +154,6 @@ done
 popd
 cargo +stable fmt -- src/ipc/gen/*
 
-echo "=== TIPS ==="
-echo "Let's manually fix rustdoc of SparseTensorIndexCSF::indptrType:"
-echo 'prepend the tree with ```text, and append the tree with ```'
-cat <<TREE_EOF
-    /// \`\`\`text
-    ///         0          1
-    ///        / \         |
-    ///       0   1        1
-    ///      /   / \       |
-    ///     0   0   1      1
-    ///    /|  /|   |    /| |
-    ///   1 2 0 2   0   0 1 2
-    /// \`\`\`
-TREE_EOF
\ No newline at end of file
+echo "DONE!"
+echo "Please run 'cargo doc' and 'cargo test' with nightly and stable, "
+echo "and fix possible errors or warnings!"
diff --git a/rust/arrow/src/array/array_primitive.rs b/rust/arrow/src/array/array_primitive.rs
index 0bdc3e5..febb165 100644
--- a/rust/arrow/src/array/array_primitive.rs
+++ b/rust/arrow/src/array/array_primitive.rs
@@ -49,7 +49,7 @@ pub struct PrimitiveArray<T: ArrowPrimitiveType> {
     /// Pointer to the value array. The lifetime of this must be <= to the value buffer
     /// stored in `data`, so it's safe to store.
     /// # Safety
-    /// raw_values must have a value equivalent to data.buffers()[0].raw_data()
+    /// raw_values must have a value equivalent to `data.buffers()[0].raw_data()`
     /// raw_values must have alignment for type T::NativeType
     raw_values: RawPtrBox<T::Native>,
 }
diff --git a/rust/arrow/src/array/equal/structure.rs b/rust/arrow/src/array/equal/structure.rs
index 8779a16..6ec7183 100644
--- a/rust/arrow/src/array/equal/structure.rs
+++ b/rust/arrow/src/array/equal/structure.rs
@@ -27,7 +27,7 @@ use super::{equal_range, utils::child_logical_null_buffer};
 /// If an array is a child of a struct or list, the array's nulls have to be merged with the parent.
 /// This then affects the null count of the array, thus the merged nulls are passed separately
 /// as `lhs_nulls` and `rhs_nulls` variables to functions.
-/// The nulls are merged with a bitwise AND, and null counts are recomputed wheer necessary.
+/// The nulls are merged with a bitwise AND, and null counts are recomputed where necessary.
 fn equal_values(
     lhs: &ArrayData,
     rhs: &ArrayData,
diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs
index 7b16d95..8c03a75 100644
--- a/rust/arrow/src/datatypes.rs
+++ b/rust/arrow/src/datatypes.rs
@@ -487,7 +487,7 @@ make_type!(
 );
 
 /// A subtype of primitive type that represents legal dictionary keys.
-/// See https://arrow.apache.org/docs/format/Columnar.html
+/// See <https://arrow.apache.org/docs/format/Columnar.html>
 pub trait ArrowDictionaryKeyType: ArrowPrimitiveType {}
 
 impl ArrowDictionaryKeyType for Int8Type {}
diff --git a/rust/arrow/src/ffi.rs b/rust/arrow/src/ffi.rs
index 79638b9..c3b0509 100644
--- a/rust/arrow/src/ffi.rs
+++ b/rust/arrow/src/ffi.rs
@@ -21,8 +21,8 @@
 //! One interface maps C ABI to native Rust types, i.e. convert c-pointers, c_char, to native rust.
 //! This is handled by [FFI_ArrowSchema] and [FFI_ArrowArray].
 //!
-//! The second interface maps native Rust types to the Rust-specific implementation of Arrow such as `format` to [Datatype],
-//! `Buffer`, etc. This is handled by [ArrowArray].
+//! The second interface maps native Rust types to the Rust-specific implementation of Arrow such as `format` to `Datatype`,
+//! `Buffer`, etc. This is handled by `ArrowArray`.
 //!
 //! ```rust
 //! # use std::sync::Arc;
@@ -91,7 +91,7 @@ use crate::error::{ArrowError, Result};
 use crate::util::bit_util;
 
 /// ABI-compatible struct for `ArrowSchema` from C Data Interface
-/// See https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions
+/// See <https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions>
 /// This was created by bindgen
 #[repr(C)]
 #[derive(Debug)]
@@ -120,7 +120,7 @@ unsafe extern "C" fn release_schema(schema: *mut FFI_ArrowSchema) {
 impl FFI_ArrowSchema {
     /// create a new [FFI_ArrowSchema] from a format.
     fn new(format: &str) -> FFI_ArrowSchema {
-        // https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema
+        // <https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema>
         FFI_ArrowSchema {
             format: CString::new(format).unwrap().into_raw(),
             name: std::ptr::null_mut(),
@@ -303,7 +303,7 @@ fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
 }
 
 /// ABI-compatible struct for ArrowArray from C Data Interface
-/// See https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions
+/// See <https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions>
 /// This was created by bindgen
 #[repr(C)]
 #[derive(Debug)]
diff --git a/rust/arrow/src/ipc/gen/Message.rs b/rust/arrow/src/ipc/gen/Message.rs
index 0d05a49..79a9df3 100644
--- a/rust/arrow/src/ipc/gen/Message.rs
+++ b/rust/arrow/src/ipc/gen/Message.rs
@@ -336,7 +336,7 @@ impl flatbuffers::SimpleToVerifyInSlice for MessageHeader {}
 /// Metadata about a field at some level of a nested type tree (but not
 /// its children).
 ///
-/// For example, a List<Int16> with values [[1, 2, 3], null, [4], [5, 6], null]
+/// For example, a List<Int16> with values `[[1, 2, 3], null, [4], [5, 6], null]`
 /// would have {length: 5, null_count: 2} for its List node, and {length: 6,
 /// null_count: 0} for its Int16 node, as separate FieldNode structs
 // struct FieldNode, aligned to 8
diff --git a/rust/arrow/src/ipc/gen/Schema.rs b/rust/arrow/src/ipc/gen/Schema.rs
index 55bbc33..61a9574 100644
--- a/rust/arrow/src/ipc/gen/Schema.rs
+++ b/rust/arrow/src/ipc/gen/Schema.rs
@@ -1594,10 +1594,11 @@ pub enum MapOffset {}
 /// not enforced.
 ///
 /// Map
+/// ```text
 ///   - child[0] entries: Struct
 ///     - child[0] key: K
 ///     - child[1] value: V
-///
+/// ```
 /// Neither the "entries" field nor the "key" field may be nullable.
 ///
 /// The metadata is structured so that Arrow systems without special handling
@@ -1703,7 +1704,7 @@ pub enum UnionOffset {}
 /// A union is a complex type with children in Field
 /// By default ids in the type vector refer to the offsets in the children
 /// optionally typeIds provides an indirection between the child offset and the type id
-/// for each child typeIds[offset] is the id used in the type vector
+/// for each child `typeIds[offset]` is the id used in the type vector
 pub struct Union<'a> {
     pub _tab: flatbuffers::Table<'a>,
 }
diff --git a/rust/arrow/src/ipc/gen/SparseTensor.rs b/rust/arrow/src/ipc/gen/SparseTensor.rs
index 1b45a82..04a2339 100644
--- a/rust/arrow/src/ipc/gen/SparseTensor.rs
+++ b/rust/arrow/src/ipc/gen/SparseTensor.rs
@@ -235,21 +235,21 @@ pub enum SparseTensorIndexCOOOffset {}
 ///
 /// For example, let X be a 2x3x4x5 tensor, and it has the following
 /// 6 non-zero values:
-///
+/// ```text
 ///   X[0, 1, 2, 0] := 1
 ///   X[1, 1, 2, 3] := 2
 ///   X[0, 2, 1, 0] := 3
 ///   X[0, 1, 3, 0] := 4
 ///   X[0, 1, 2, 1] := 5
 ///   X[1, 2, 0, 4] := 6
-///
+/// ```
 /// In COO format, the index matrix of X is the following 4x6 matrix:
-///
+/// ```text
 ///   [[0, 0, 0, 0, 1, 1],
 ///    [1, 1, 1, 2, 1, 2],
 ///    [2, 2, 3, 1, 2, 0],
 ///    [0, 1, 0, 0, 3, 4]]
-///
+/// ```
 /// When isCanonical is true, the indices is sorted in lexicographical order
 /// (row-major order), and it does not have duplicated entries.  Otherwise,
 /// the indices may not be sorted, or may have duplicated entries.
@@ -518,26 +518,27 @@ impl<'a> SparseMatrixIndexCSX<'a> {
     }
     /// indptrBuffer stores the location and size of indptr array that
     /// represents the range of the rows.
-    /// The i-th row spans from indptr[i] to indptr[i+1] in the data.
+    /// The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data.
     /// The length of this array is 1 + (the number of rows), and the type
     /// of index value is long.
     ///
     /// For example, let X be the following 6x4 matrix:
-    ///
+    /// ```text
     ///   X := [[0, 1, 2, 0],
     ///         [0, 0, 3, 0],
     ///         [0, 4, 0, 5],
     ///         [0, 0, 0, 0],
     ///         [6, 0, 7, 8],
     ///         [0, 9, 0, 0]].
-    ///
+    /// ```
     /// The array of non-zero values in X is:
-    ///
+    /// ```text
     ///   values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9].
-    ///
+    /// ```
     /// And the indptr of X is:
-    ///
+    /// ```text
     ///   indptr(X) = [0, 2, 3, 5, 5, 8, 10].
+    /// ```
     #[inline]
     pub fn indptrBuffer(&self) -> &'a Buffer {
         self._tab
@@ -559,9 +560,9 @@ impl<'a> SparseMatrixIndexCSX<'a> {
     /// The type of index value is long.
     ///
     /// For example, the indices of the above X is:
-    ///
+    /// ```text
     ///   indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
-    ///
+    /// ```
     /// Note that the indices are sorted in lexicographical order for each row.
     #[inline]
     pub fn indicesBuffer(&self) -> &'a Buffer {
@@ -750,7 +751,7 @@ impl<'a> SparseTensorIndexCSF<'a> {
     pub const VT_AXISORDER: flatbuffers::VOffsetT = 12;
 
     /// CSF is a generalization of compressed sparse row (CSR) index.
-    /// See [smith2017knl]: http://shaden.io/pub-files/smith2017knl.pdf
+    /// See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf)
     ///
     /// CSF index recursively compresses each dimension of a tensor into a set
     /// of prefix trees. Each path from a root to leaf forms one tensor
@@ -759,7 +760,7 @@ impl<'a> SparseTensorIndexCSF<'a> {
     ///
     /// For example, let X be a 2x3x4x5 tensor and let it have the following
     /// 8 non-zero values:
-    ///
+    /// ```text
     ///   X[0, 0, 0, 1] := 1
     ///   X[0, 0, 0, 2] := 2
     ///   X[0, 1, 0, 0] := 3
@@ -768,9 +769,8 @@ impl<'a> SparseTensorIndexCSF<'a> {
     ///   X[1, 1, 1, 0] := 6
     ///   X[1, 1, 1, 1] := 7
     ///   X[1, 1, 1, 2] := 8
-    ///
+    /// ```
     /// As a prefix tree this would be represented as:
-    ///
     /// ```text
     ///         0          1
     ///        / \         |
@@ -792,18 +792,18 @@ impl<'a> SparseTensorIndexCSF<'a> {
     }
     /// indptrBuffers stores the sparsity structure.
     /// Each two consecutive dimensions in a tensor correspond to a buffer in
-    /// indptrBuffers. A pair of consecutive values at indptrBuffers[dim][i]
-    /// and indptrBuffers[dim][i + 1] signify a range of nodes in
-    /// indicesBuffers[dim + 1] who are children of indicesBuffers[dim][i] node.
+    /// indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]`
+    /// and `indptrBuffers[dim][i + 1]` signify a range of nodes in
+    /// `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node.
     ///
     /// For example, the indptrBuffers for the above X is:
-    ///
+    /// ```text
     ///   indptrBuffer(X) = [
     ///                       [0, 2, 3],
     ///                       [0, 1, 3, 4],
     ///                       [0, 2, 4, 5, 8]
     ///                     ].
-    ///
+    /// ```
     #[inline]
     pub fn indptrBuffers(&self) -> &'a [Buffer] {
         self._tab
@@ -827,14 +827,14 @@ impl<'a> SparseTensorIndexCSF<'a> {
     /// indicesBuffers stores values of nodes.
     /// Each tensor dimension corresponds to a buffer in indicesBuffers.
     /// For example, the indicesBuffers for the above X is:
-    ///
+    /// ```text
     ///   indicesBuffer(X) = [
     ///                        [0, 1],
     ///                        [0, 1, 1],
     ///                        [0, 0, 1, 1],
     ///                        [1, 2, 0, 2, 0, 0, 1, 2]
     ///                      ].
-    ///
+    /// ```
     #[inline]
     pub fn indicesBuffers(&self) -> &'a [Buffer] {
         self._tab
@@ -848,9 +848,9 @@ impl<'a> SparseTensorIndexCSF<'a> {
     /// axisOrder stores the sequence in which dimensions were traversed to
     /// produce the prefix tree.
     /// For example, the axisOrder for the above X is:
-    ///
+    /// ```text
     ///   axisOrder(X) = [0, 1, 2, 3].
-    ///
+    /// ```
     #[inline]
     pub fn axisOrder(&self) -> flatbuffers::Vector<'a, i32> {
         self._tab
diff --git a/rust/arrow/src/lib.rs b/rust/arrow/src/lib.rs
index 9c91d38..1fa3cdd 100644
--- a/rust/arrow/src/lib.rs
+++ b/rust/arrow/src/lib.rs
@@ -70,8 +70,8 @@
 //!
 //! ## Memory and Buffers
 //!
-//! Data in [`Array`](array::Array) is stored in [`ArrayData`](array::data::ArrayData), that in turn
-//! is a collection of other [`ArrayData`](array::data::ArrayData) and [`Buffers`](buffer::Buffer).
+//! Data in [`Array`](array::Array) is stored in [`ArrayData`](array::ArrayData), that in turn
+//! is a collection of other [`ArrayData`](array::ArrayData) and [`Buffers`](buffer::Buffer).
 //! [`Buffers`](buffer::Buffer) is the central struct that array implementations use keep allocated memory and pointers.
 //! The [`MutableBuffer`](buffer::MutableBuffer) is the mutable counter-part of[`Buffer`](buffer::Buffer).
 //! These are the lowest abstractions of this crate, and are used throughout the crate to
@@ -90,7 +90,7 @@
 //! ## Compute
 //!
 //! This crate offers many operations (called kernels) to operate on `Array`s, that you can find at [compute::kernels].
-//! It has both vertial and horizontal operations, and some of them have an SIMD implementation.
+//! It has both vertical and horizontal operations, and some of them have an SIMD implementation.
 //!
 //! ## Status
 //!
diff --git a/rust/datafusion/src/error.rs b/rust/datafusion/src/error.rs
index b4c8dcc..903faea 100644
--- a/rust/datafusion/src/error.rs
+++ b/rust/datafusion/src/error.rs
@@ -39,7 +39,7 @@ pub enum DataFusionError {
     ParquetError(ParquetError),
     /// Error associated to I/O operations and associated traits.
     IoError(io::Error),
-    /// Error returned when SQL is syntatically incorrect.
+    /// Error returned when SQL is syntactically incorrect.
     SQL(ParserError),
     /// Error returned on a branch that we know it is possible
     /// but to which we still have no implementation for.
@@ -59,7 +59,7 @@ pub enum DataFusionError {
 }
 
 impl DataFusionError {
-    /// Wraps this [DataFusionError] as an [Arrow::error::ArrowError].
+    /// Wraps this [DataFusionError] as an [arrow::error::ArrowError].
     pub fn into_arrow_external_error(self) -> ArrowError {
         ArrowError::from_external_error(Box::new(self))
     }
diff --git a/rust/datafusion/src/logical_plan/plan.rs b/rust/datafusion/src/logical_plan/plan.rs
index f120548..8002d16 100644
--- a/rust/datafusion/src/logical_plan/plan.rs
+++ b/rust/datafusion/src/logical_plan/plan.rs
@@ -213,7 +213,7 @@ pub enum Partitioning {
     RoundRobinBatch(usize),
     /// Allocate rows based on a hash of one of more expressions and the specified number
     /// of partitions.
-    /// This partitioning scheme is not yet fully supported. See https://issues.apache.org/jira/browse/ARROW-11011
+    /// This partitioning scheme is not yet fully supported. See <https://issues.apache.org/jira/browse/ARROW-11011>
     Hash(Vec<Expr>, usize),
 }
 
@@ -248,7 +248,7 @@ pub trait PlanVisitor {
     /// Invoked on a logical plan before any of its child inputs have been
     /// visited. If Ok(true) is returned, the recursion continues. If
     /// Err(..) or Ok(false) are returned, the recursion stops
-    /// immedately and the error, if any, is returned to `accept`
+    /// immediately and the error, if any, is returned to `accept`
     fn pre_visit(&mut self, plan: &LogicalPlan)
         -> std::result::Result<bool, Self::Error>;
 
@@ -835,9 +835,9 @@ mod tests {
         }
     }
 
-    /// test earliy stopping in pre-visit
+    /// test early stopping in pre-visit
     #[test]
-    fn early_stoping_pre_visit() {
+    fn early_stopping_pre_visit() {
         let mut visitor = StoppingVisitor {
             return_false_from_pre_in: OptionalCounter::new(2),
             ..Default::default()
@@ -853,7 +853,7 @@ mod tests {
     }
 
     #[test]
-    fn early_stoping_post_visit() {
+    fn early_stopping_post_visit() {
         let mut visitor = StoppingVisitor {
             return_false_from_post_in: OptionalCounter::new(1),
             ..Default::default()
diff --git a/rust/datafusion/src/physical_plan/mod.rs b/rust/datafusion/src/physical_plan/mod.rs
index 605e5d6..f2b984b 100644
--- a/rust/datafusion/src/physical_plan/mod.rs
+++ b/rust/datafusion/src/physical_plan/mod.rs
@@ -131,7 +131,7 @@ pub enum Partitioning {
     RoundRobinBatch(usize),
     /// Allocate rows based on a hash of one of more expressions and the specified
     /// number of partitions
-    /// This partitioning scheme is not yet fully supported. See https://issues.apache.org/jira/browse/ARROW-11011
+    /// This partitioning scheme is not yet fully supported. See [ARROW-11011](https://issues.apache.org/jira/browse/ARROW-11011)
     Hash(Vec<Arc<dyn PhysicalExpr>>, usize),
     /// Unknown partitioning scheme with a known number of partitions
     UnknownPartitioning(usize),
diff --git a/rust/datafusion/src/physical_plan/parquet.rs b/rust/datafusion/src/physical_plan/parquet.rs
index 53b2667..9a03afd 100644
--- a/rust/datafusion/src/physical_plan/parquet.rs
+++ b/rust/datafusion/src/physical_plan/parquet.rs
@@ -60,11 +60,11 @@ pub struct ParquetExec {
 ///
 /// In the future it would be good to support subsets of files based on ranges of row groups
 /// so that we can better parallelize reads of large files across available cores (see
-/// https://issues.apache.org/jira/browse/ARROW-10995).
+/// [ARROW-10995](https://issues.apache.org/jira/browse/ARROW-10995)).
 ///
 /// We may also want to support reading Parquet files that are partitioned based on a key and
 /// in this case we would want this partition struct to represent multiple files for a given
-/// partition key (see https://issues.apache.org/jira/browse/ARROW-11019).
+/// partition key (see [ARROW-11019](https://issues.apache.org/jira/browse/ARROW-11019)).
 #[derive(Debug, Clone)]
 pub struct ParquetPartition {
     /// The Parquet filename for this partition
diff --git a/rust/parquet/Cargo.toml b/rust/parquet/Cargo.toml
index 72b0e9d..6529a85 100644
--- a/rust/parquet/Cargo.toml
+++ b/rust/parquet/Cargo.toml
@@ -29,6 +29,8 @@ build = "build.rs"
 edition = "2018"
 
 [dependencies]
+# update note: pin `parquet-format` to specific version until it does not break at minor
+# version, see ARROW-11187.
 parquet-format = "~2.6.1"
 byteorder = "1"
 thrift = "0.13"
diff --git a/rust/parquet/src/arrow/levels.rs b/rust/parquet/src/arrow/levels.rs
index 1c178e3..846ceab 100644
--- a/rust/parquet/src/arrow/levels.rs
+++ b/rust/parquet/src/arrow/levels.rs
@@ -20,12 +20,12 @@
 //! Contains the algorithm for computing definition and repetition levels.
 //! The algorithm works by tracking the slots of an array that should ultimately be populated when
 //! writing to Parquet.
-//! Parquet achieves nesting through definition levels and repetition levels [1].
+//! Parquet achieves nesting through definition levels and repetition levels \[1\].
 //! Definition levels specify how many optional fields in the part for the column are defined.
 //! Repetition levels specify at what repeated field (list) in the path a column is defined.
 //!
 //! In a nested data structure such as `a.b.c`, one can see levels as defining whether a record is
-//! defined at `a`, `a.b`, or `a.b.c`. Optional fields are nullable fields, thus if all 3 fiedls
+//! defined at `a`, `a.b`, or `a.b.c`. Optional fields are nullable fields, thus if all 3 fields
 //! are nullable, the maximum definition will be = 3.
 //!
 //! The algorithm in this module computes the necessary information to enable the writer to keep
@@ -37,13 +37,13 @@
 //! We use an eager approach that increments definition levels where incrementable, and decrements
 //! if a value being checked is null.
 //!
-//! [1] https://github.com/apache/parquet-format#nested-encoding
+//! \[1\] [parquet-format#nested-encoding](https://github.com/apache/parquet-format#nested-encoding)
 
 use arrow::array::{Array, ArrayRef, StructArray};
 use arrow::datatypes::{DataType, Field};
 use arrow::record_batch::RecordBatch;
 
-/// Keeps track of the level information per array that is needed to write an Arrow aray to Parquet.
+/// Keeps track of the level information per array that is needed to write an Arrow array to Parquet.
 ///
 /// When a nested schema is traversed, intermediate [LevelInfo] structs are created to track
 /// the state of parent arrays. When a primitive Arrow array is encountered, a final [LevelInfo]
diff --git a/rust/parquet/src/basic.rs b/rust/parquet/src/basic.rs
index 0cea043..bf41d43 100644
--- a/rust/parquet/src/basic.rs
+++ b/rust/parquet/src/basic.rs
@@ -256,7 +256,7 @@ pub enum PageType {
 /// min/max.
 ///
 /// See reference in
-/// https://github.com/apache/parquet-cpp/blob/master/src/parquet/types.h
+/// <https://github.com/apache/parquet-cpp/blob/master/src/parquet/types.h>
 #[derive(Debug, Clone, Copy, PartialEq)]
 pub enum SortOrder {
     /// Signed (either value or legacy byte-wise) comparison.
diff --git a/rust/parquet/src/column/page.rs b/rust/parquet/src/column/page.rs
index 43c0c4a..0573616 100644
--- a/rust/parquet/src/column/page.rs
+++ b/rust/parquet/src/column/page.rs
@@ -93,7 +93,7 @@ impl Page {
         }
     }
 
-    /// Returns optional [`Statistics`](crate::file::metadata::Statistics).
+    /// Returns optional [`Statistics`](crate::file::statistics::Statistics).
     pub fn statistics(&self) -> Option<&Statistics> {
         match self {
             Page::DataPage { ref statistics, .. } => statistics.as_ref(),
diff --git a/rust/parquet/src/encodings/rle.rs b/rust/parquet/src/encodings/rle.rs
index 5a52201..d8cd50d 100644
--- a/rust/parquet/src/encodings/rle.rs
+++ b/rust/parquet/src/encodings/rle.rs
@@ -25,7 +25,7 @@ use crate::util::{
 
 /// Rle/Bit-Packing Hybrid Encoding
 /// The grammar for this encoding looks like the following (copied verbatim
-/// from https://github.com/Parquet/parquet-format/blob/master/Encodings.md):
+/// from <https://github.com/Parquet/parquet-format/blob/master/Encodings.md>):
 ///
 /// rle-bit-packed-hybrid: <length> <encoded-data>
 /// length := length of the <encoded-data> in bytes stored as 4 bytes little endian
diff --git a/rust/parquet/src/record/reader.rs b/rust/parquet/src/record/reader.rs
index a6f5e29..0b02bc8 100644
--- a/rust/parquet/src/record/reader.rs
+++ b/rust/parquet/src/record/reader.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 //! Contains implementation of record assembly and converting Parquet types into
-//! [`Row`](crate::record::api::Row)s.
+//! [`Row`](crate::record::Row)s.
 
 use std::{collections::HashMap, fmt, sync::Arc};
 
@@ -346,7 +346,7 @@ impl Reader {
     /// Returns true if repeated type is an element type for the list.
     /// Used to determine legacy list types.
     /// This method is copied from Spark Parquet reader and is based on the reference:
-    /// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
+    /// <https://github.com/apache/parquet-format/blob/master/LogicalTypes.md>
     ///   #backward-compatibility-rules
     fn is_element_type(repeated_type: &Type) -> bool {
         // For legacy 2-level list types with primitive element type, e.g.:
@@ -628,7 +628,7 @@ impl<'a> Either<'a> {
     }
 }
 
-/// Iterator of [`Row`](crate::record::api::Row)s.
+/// Iterator of [`Row`](crate::record::Row)s.
 /// It is used either for a single row group to iterate over data in that row group, or
 /// an entire file with auto buffering of all row groups.
 pub struct RowIter<'a> {
@@ -641,7 +641,7 @@ pub struct RowIter<'a> {
 }
 
 impl<'a> RowIter<'a> {
-    /// Creates a new iterator of [`Row`](crate::record::api::Row)s.
+    /// Creates a new iterator of [`Row`](crate::record::Row)s.
     fn new(
         file_reader: Option<Either<'a>>,
         row_iter: Option<ReaderIter>,
@@ -663,7 +663,7 @@ impl<'a> RowIter<'a> {
         }
     }
 
-    /// Creates iterator of [`Row`](crate::record::api::Row)s for all row groups in a
+    /// Creates iterator of [`Row`](crate::record::Row)s for all row groups in a
     /// file.
     pub fn from_file(proj: Option<Type>, reader: &'a FileReader) -> Result<Self> {
         let either = Either::Left(reader);
@@ -675,7 +675,7 @@ impl<'a> RowIter<'a> {
         Ok(Self::new(Some(either), None, descr))
     }
 
-    /// Creates iterator of [`Row`](crate::record::api::Row)s for a specific row group.
+    /// Creates iterator of [`Row`](crate::record::Row)s for a specific row group.
     pub fn from_row_group(
         proj: Option<Type>,
         reader: &'a RowGroupReader,
@@ -689,7 +689,7 @@ impl<'a> RowIter<'a> {
         Ok(Self::new(None, Some(row_iter), descr))
     }
 
-    /// Creates a iterator of [`Row`](crate::record::api::Row)s from a
+    /// Creates a iterator of [`Row`](crate::record::Row)s from a
     /// [`FileReader`](crate::file::reader::FileReader) using the full file schema.
     pub fn from_file_into(reader: Box<FileReader>) -> Self {
         let either = Either::Right(reader);
@@ -702,7 +702,7 @@ impl<'a> RowIter<'a> {
         Self::new(Some(either), None, descr)
     }
 
-    /// Tries to create a iterator of [`Row`](crate::record::api::Row)s using projections.
+    /// Tries to create a iterator of [`Row`](crate::record::Row)s using projections.
     /// Returns a error if a file reader is not the source of this iterator.
     ///
     /// The Projected schema can be a subset of or equal to the file schema,
@@ -784,7 +784,7 @@ impl<'a> Iterator for RowIter<'a> {
     }
 }
 
-/// Internal iterator of [`Row`](crate::record::api::Row)s for a reader.
+/// Internal iterator of [`Row`](crate::record::Row)s for a reader.
 pub struct ReaderIter {
     root_reader: Reader,
     records_left: usize,