You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by fs...@apache.org on 2020/05/01 17:46:28 UTC

[arrow] branch master updated: ARROW-6501: [C++] Remove non_zero_length_ field from SparseIndex class

This is an automated email from the ASF dual-hosted git repository.

fsaintjacques pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new c63b26d  ARROW-6501: [C++] Remove non_zero_length_ field from SparseIndex class
c63b26d is described below

commit c63b26debd626fbf5f79802100c5e58eef9a1bd1
Author: Kenta Murata <mr...@mrkn.jp>
AuthorDate: Fri May 1 13:46:07 2020 -0400

    ARROW-6501: [C++] Remove non_zero_length_ field from SparseIndex class
    
    This field is essentially needless, and may be obstacle to the future improvement of sparse tensors, such as adding value inserting feature.
    
    Closes #7079 from mrkn/ARROW-6501
    
    Authored-by: Kenta Murata <mr...@mrkn.jp>
    Signed-off-by: François Saint-Jacques <fs...@gmail.com>
---
 cpp/src/arrow/sparse_tensor.cc |  7 ++-----
 cpp/src/arrow/sparse_tensor.h  | 27 +++++++++++++++++----------
 2 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index 8da6a8b..cbe7285 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -326,7 +326,7 @@ Result<std::shared_ptr<SparseCOOIndex>> SparseCOOIndex::Make(
 
 // Constructor with a contiguous NumericTensor
 SparseCOOIndex::SparseCOOIndex(const std::shared_ptr<Tensor>& coords)
-    : SparseIndexBase(coords->shape()[0]), coords_(coords) {
+    : SparseIndexBase(), coords_(coords) {
   ARROW_CHECK_OK(
       CheckSparseCOOIndexValidity(coords_->type(), coords_->shape(), coords_->strides()));
 }
@@ -428,10 +428,7 @@ Result<std::shared_ptr<SparseCSFIndex>> SparseCSFIndex::Make(
 SparseCSFIndex::SparseCSFIndex(const std::vector<std::shared_ptr<Tensor>>& indptr,
                                const std::vector<std::shared_ptr<Tensor>>& indices,
                                const std::vector<int64_t>& axis_order)
-    : SparseIndexBase(indices.back()->size()),
-      indptr_(indptr),
-      indices_(indices),
-      axis_order_(axis_order) {
+    : SparseIndexBase(), indptr_(indptr), indices_(indices), axis_order_(axis_order) {
   ARROW_CHECK_OK(CheckSparseCSFIndexValidity(
       indptr_.front()->type(), indices_.front()->type(), indptr_.size(), indices_.size(),
       indptr_.back()->shape(), indices_.back()->shape(), axis_order_.size()));
diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
index e5624f1..c121067 100644
--- a/cpp/src/arrow/sparse_tensor.h
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -64,8 +64,7 @@ struct SparseTensorFormat {
 /// format_id must have only one corresponding concrete subclass of SparseIndex.
 class ARROW_EXPORT SparseIndex {
  public:
-  explicit SparseIndex(SparseTensorFormat::type format_id, int64_t non_zero_length)
-      : format_id_(format_id), non_zero_length_(non_zero_length) {}
+  explicit SparseIndex(SparseTensorFormat::type format_id) : format_id_(format_id) {}
 
   virtual ~SparseIndex() = default;
 
@@ -74,7 +73,7 @@ class ARROW_EXPORT SparseIndex {
 
   /// \brief Return the number of non zero values in the sparse tensor related
   /// to this sparse index
-  int64_t non_zero_length() const { return non_zero_length_; }
+  virtual int64_t non_zero_length() const = 0;
 
   /// \brief Return the string representation of the sparse index
   virtual std::string ToString() const = 0;
@@ -82,16 +81,14 @@ class ARROW_EXPORT SparseIndex {
   virtual Status ValidateShape(const std::vector<int64_t>& shape) const;
 
  protected:
-  SparseTensorFormat::type format_id_;
-  int64_t non_zero_length_;
+  const SparseTensorFormat::type format_id_;
 };
 
 namespace internal {
 template <typename SparseIndexType>
 class SparseIndexBase : public SparseIndex {
  public:
-  explicit SparseIndexBase(int64_t non_zero_length)
-      : SparseIndex(SparseIndexType::format_id, non_zero_length) {}
+  SparseIndexBase() : SparseIndex(SparseIndexType::format_id) {}
 };
 }  // namespace internal
 
@@ -131,6 +128,10 @@ class ARROW_EXPORT SparseCOOIndex : public internal::SparseIndexBase<SparseCOOIn
   /// logical value at those coordinates should be found at physical index `i`.
   const std::shared_ptr<Tensor>& indices() const { return coords_; }
 
+  /// \brief Return the number of non zero values in the sparse tensor related
+  /// to this sparse index
+  int64_t non_zero_length() const override { return coords_->shape()[0]; }
+
   /// \brief Return a string representation of the sparse index
   std::string ToString() const override;
 
@@ -232,9 +233,7 @@ class SparseCSXIndex : public SparseIndexBase<SparseIndexType> {
   /// \brief Construct SparseCSXIndex from two index vectors
   explicit SparseCSXIndex(const std::shared_ptr<Tensor>& indptr,
                           const std::shared_ptr<Tensor>& indices)
-      : SparseIndexBase<SparseIndexType>(indices->shape()[0]),
-        indptr_(indptr),
-        indices_(indices) {
+      : SparseIndexBase<SparseIndexType>(), indptr_(indptr), indices_(indices) {
     CheckSparseCSXIndexValidity(indptr_->type(), indices_->type(), indptr_->shape(),
                                 indices_->shape(), SparseIndexType::kTypeName);
   }
@@ -245,6 +244,10 @@ class SparseCSXIndex : public SparseIndexBase<SparseIndexType> {
   /// \brief Return a 1D tensor of indices vector
   const std::shared_ptr<Tensor>& indices() const { return indices_; }
 
+  /// \brief Return the number of non zero values in the sparse tensor related
+  /// to this sparse index
+  int64_t non_zero_length() const override { return indices_->shape()[0]; }
+
   /// \brief Return a string representation of the sparse index
   std::string ToString() const override {
     return std::string(SparseIndexType::kTypeName);
@@ -391,6 +394,10 @@ class ARROW_EXPORT SparseCSFIndex : public internal::SparseIndexBase<SparseCSFIn
   /// \brief Return a 1D vector specifying the order of axes
   const std::vector<int64_t>& axis_order() const { return axis_order_; }
 
+  /// \brief Return the number of non zero values in the sparse tensor related
+  /// to this sparse index
+  int64_t non_zero_length() const override { return indices_.back()->shape()[0]; }
+
   /// \brief Return a string representation of the sparse index
   std::string ToString() const override;