You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/03/13 01:38:23 UTC

[arrow] branch master updated: ARROW-4335: [C++] Better document sparse tensor support

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new f83a282  ARROW-4335: [C++] Better document sparse tensor support
f83a282 is described below

commit f83a2822db510d589c604904de264104eef04fd6
Author: Kenta Murata <mr...@mrkn.jp>
AuthorDate: Tue Mar 12 20:37:54 2019 -0500

    ARROW-4335: [C++] Better document sparse tensor support
    
    I wrote descriptions for sparse tensor classes.
    
    Author: Kenta Murata <mr...@mrkn.jp>
    
    Closes #3810 from mrkn/sparse_tensor_doc and squashes the following commits:
    
    9c4fe1422 <Kenta Murata> Fix incorrect English
    522a982f1 <Kenta Murata> Modify comments following review comments
    83cab0377 <Kenta Murata>  Put SparseIndexBase<> class in internal namespace
    6cd13e2eb <Kenta Murata>  Write descriptions of sparse tensor
---
 cpp/src/arrow/sparse_tensor.h | 69 ++++++++++++++++++++++++++++++++++++-------
 1 file changed, 58 insertions(+), 11 deletions(-)

diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
index ded3a6d..e622245 100644
--- a/cpp/src/arrow/sparse_tensor.h
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -29,13 +29,18 @@ namespace arrow {
 // ----------------------------------------------------------------------
 // SparseIndex class
 
-/// \brief EXPERIMENTAL: Sparse tensor format enumeration
 struct SparseTensorFormat {
+  /// EXPERIMENTAL: The index format type of SparseTensor
   enum type { COO, CSR };
 };
 
-/// \brief EXPERIMENTAL: The base class for representing index of non-zero
-/// values in sparse tensor
+/// \brief EXPERIMENTAL: The base class for the index of a sparse tensor
+///
+/// SparseIndex describes where the non-zero elements are within a SparseTensor.
+///
+/// There are several ways to represent this.  The format_id is used to
+/// distinguish what kind of representation is used.  Each possible value of
+/// format_id must have only one corresponding concrete subclass of SparseIndex.
 class ARROW_EXPORT SparseIndex {
  public:
   explicit SparseIndex(SparseTensorFormat::type format_id, int64_t non_zero_length)
@@ -43,9 +48,14 @@ class ARROW_EXPORT SparseIndex {
 
   virtual ~SparseIndex() = default;
 
+  /// \brief Return the identifier of the format type
   SparseTensorFormat::type format_id() const { return format_id_; }
+
+  /// \brief Return the number of non zero values in the sparse tensor related
+  /// to this sparse index
   int64_t non_zero_length() const { return non_zero_length_; }
 
+  /// \brief Return the string representation of the sparse index
   virtual std::string ToString() const = 0;
 
  protected:
@@ -53,18 +63,23 @@ class ARROW_EXPORT SparseIndex {
   int64_t non_zero_length_;
 };
 
+namespace internal {
 template <typename SparseIndexType>
 class SparseIndexBase : public SparseIndex {
  public:
   explicit SparseIndexBase(int64_t non_zero_length)
       : SparseIndex(SparseIndexType::format_id, non_zero_length) {}
 };
+}  // namespace internal
 
 // ----------------------------------------------------------------------
 // SparseCOOIndex class
 
-/// \brief EXPERIMENTAL: The index data for COO sparse tensor
-class ARROW_EXPORT SparseCOOIndex : public SparseIndexBase<SparseCOOIndex> {
+/// \brief EXPERIMENTAL: The index data for a COO sparse tensor
+///
+/// A COO sparse index manages the location of its non-zero values by their
+/// coordinates.
+class ARROW_EXPORT SparseCOOIndex : public internal::SparseIndexBase<SparseCOOIndex> {
  public:
   using CoordsTensor = NumericTensor<Int64Type>;
 
@@ -73,10 +88,13 @@ class ARROW_EXPORT SparseCOOIndex : public SparseIndexBase<SparseCOOIndex> {
   // Constructor with a column-major NumericTensor
   explicit SparseCOOIndex(const std::shared_ptr<CoordsTensor>& coords);
 
+  /// \brief Return a tensor that has the coordinates of the non-zero values
   const std::shared_ptr<CoordsTensor>& indices() const { return coords_; }
 
+  /// \brief Return a string representation of the sparse index
   std::string ToString() const override;
 
+  /// \brief Return whether the COO indices are equal
   bool Equals(const SparseCOOIndex& other) const {
     return indices()->Equals(*other.indices());
   }
@@ -88,8 +106,19 @@ class ARROW_EXPORT SparseCOOIndex : public SparseIndexBase<SparseCOOIndex> {
 // ----------------------------------------------------------------------
 // SparseCSRIndex class
 
-/// \brief EXPERIMENTAL: The index data for CSR sparse matrix
-class ARROW_EXPORT SparseCSRIndex : public SparseIndexBase<SparseCSRIndex> {
+/// \brief EXPERIMENTAL: The index data for a CSR sparse matrix
+///
+/// A CSR sparse index manages the location of its non-zero values by two
+/// vectors.
+///
+/// The first vector, called indptr, represents the range of the rows; the i-th
+/// row spans from indptr[i] to indptr[i+1] in the corresponding value vector.
+/// So the length of an indptr vector is the number of rows + 1.
+///
+/// The other vector, called indices, represents the column indices of the
+/// corresponding non-zero values.  So the length of an indices vector is same
+/// as the number of non-zero-values.
+class ARROW_EXPORT SparseCSRIndex : public internal::SparseIndexBase<SparseCSRIndex> {
  public:
   using IndexTensor = NumericTensor<Int64Type>;
 
@@ -99,11 +128,16 @@ class ARROW_EXPORT SparseCSRIndex : public SparseIndexBase<SparseCSRIndex> {
   explicit SparseCSRIndex(const std::shared_ptr<IndexTensor>& indptr,
                           const std::shared_ptr<IndexTensor>& indices);
 
+  /// \brief Return a 1D tensor of indptr vector
   const std::shared_ptr<IndexTensor>& indptr() const { return indptr_; }
+
+  /// \brief Return a 1D tensor of indices vector
   const std::shared_ptr<IndexTensor>& indices() const { return indices_; }
 
+  /// \brief Return a string representation of the sparse index
   std::string ToString() const override;
 
+  /// \brief Return whether the CSR indices are equal
   bool Equals(const SparseCSRIndex& other) const {
     return indptr()->Equals(*other.indptr()) && indices()->Equals(*other.indices());
   }
@@ -123,32 +157,45 @@ class ARROW_EXPORT SparseTensor {
 
   SparseTensorFormat::type format_id() const { return sparse_index_->format_id(); }
 
+  /// \brief Return a value type of the sparse tensor
   std::shared_ptr<DataType> type() const { return type_; }
+
+  /// \brief Return a buffer that contains the value vector of the sparse tensor
   std::shared_ptr<Buffer> data() const { return data_; }
 
+  /// \brief Return an immutable raw data pointer
   const uint8_t* raw_data() const { return data_->data(); }
+
+  /// \brief Return a mutable raw data pointer
   uint8_t* raw_mutable_data() const { return data_->mutable_data(); }
 
+  /// \brief Return a shape vector of the sparse tensor
   const std::vector<int64_t>& shape() const { return shape_; }
 
+  /// \brief Return a sparse index of the sparse tensor
   const std::shared_ptr<SparseIndex>& sparse_index() const { return sparse_index_; }
 
+  /// \brief Return a number of dimensions of the sparse tensor
   int ndim() const { return static_cast<int>(shape_.size()); }
 
+  /// \brief Return a vector of dimension names
   const std::vector<std::string>& dim_names() const { return dim_names_; }
+
+  /// \brief Return the name of the i-th dimension
   const std::string& dim_name(int i) const;
 
-  /// Total number of value cells in the sparse tensor
+  /// \brief Total number of value cells in the sparse tensor
   int64_t size() const;
 
-  /// Return true if the underlying data buffer is mutable
+  /// \brief Return true if the underlying data buffer is mutable
   bool is_mutable() const { return data_->is_mutable(); }
 
-  /// Total number of non-zero cells in the sparse tensor
+  /// \brief Total number of non-zero cells in the sparse tensor
   int64_t non_zero_length() const {
     return sparse_index_ ? sparse_index_->non_zero_length() : 0;
   }
 
+  /// \brief Return whether sparse tensors are equal
   bool Equals(const SparseTensor& other) const;
 
  protected:
@@ -163,7 +210,7 @@ class ARROW_EXPORT SparseTensor {
   std::vector<int64_t> shape_;
   std::shared_ptr<SparseIndex> sparse_index_;
 
-  /// These names are optional
+  // These names are optional
   std::vector<std::string> dim_names_;
 };