You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2017/12/05 05:03:37 UTC

[GitHub] eric-haibin-lin commented on a change in pull request #8922: fix a bug in sparse batch loader when batch size is extremely large

eric-haibin-lin commented on a change in pull request #8922: fix a bug in sparse batch loader when batch size is extremely large
URL: https://github.com/apache/incubator-mxnet/pull/8922#discussion_r154847902
 
 

 ##########
 File path: src/io/iter_sparse_batchloader.h
 ##########
 @@ -157,44 +140,100 @@ class SparseBatchLoader : public BatchLoader, public SparseIIterator<TBlobBatch>
       return true;
     }
     // label indptr
-    if (i == label_indptr_offset && label_stype_ == kCSRStorage && data_stype_ == kCSRStorage) {
+    if (i == label_indptr_offset && label_stype_ == kCSRStorage &&
+        data_stype_ == kCSRStorage) {
       return true;
     }
     return false;
   }
 
   // initialize the data holder by using from the batch
-  inline void InitDataFromBatch() {
+  inline void InitData(const DataInst& first_batch) {
     CHECK(data_stype_ == kCSRStorage || label_stype_ == kCSRStorage);
-    CHECK_GT(inst_cache_.size(), 0);
     out_.data.clear();
     data_.clear();
     offsets_.clear();
 
-    size_t total_size = inst_cache_[0].data.size();
+    size_t total_size = first_batch.data.size();
     data_.resize(total_size);
     offsets_.resize(total_size, 0);
-    std::vector<size_t> vec_sizes(total_size, 0);
-    // accumulate the memory required for a batch
+    // tensor buffer sizes
+    std::vector<size_t> buff_sizes(total_size, 0);
+    dtypes_.resize(total_size);
+    out_.data.resize(total_size);
+    // estimate the memory required for a batch
     for (size_t i = 0; i < total_size; ++i) {
       size_t size = 0;
-      // vec_size for indptr
+      // shape for indptr
       if (IsIndPtr(i)) {
-        size = param_.batch_size + 1;
+        buff_sizes[i] = param_.batch_size + 1;
       } else {
-        for (const auto &d : inst_cache_) size += d.data[i].shape_.Size();
+        // estimated the size for the whole batch based on the first instance
+        buff_sizes[i] = first_batch.data[i].Size() * param_.batch_size;
       }
-      vec_sizes[i] = size;
+      dtypes_[i] = first_batch.data[i].type_flag_;
     }
 
-    CHECK_EQ(vec_sizes[0], vec_sizes[1]);
+    CHECK_EQ(buff_sizes[0], buff_sizes[1]);
+    // allocate buffer
     for (size_t i = 0; i < total_size; ++i) {
-      int src_type_flag = inst_cache_[0].data[i].type_flag_;
       // init object attributes
-      TShape dst_shape(mshadow::Shape1(vec_sizes[i]));
-      data_[i].resize(mshadow::Shape1(vec_sizes[i]), src_type_flag);
+      TShape dst_shape(mshadow::Shape1(buff_sizes[i]));
+      data_[i].resize(mshadow::Shape1(buff_sizes[i]), dtypes_[i]);
       CHECK(data_[i].dptr_ != nullptr);
-      out_.data.push_back(TBlob(data_[i].dptr_, dst_shape, cpu::kDevMask, src_type_flag));
+    }
+  }
+
+  /* \brief set the shape of the outputs based on actual shapes */
+  inline void SetOutputShape() {
+    for (size_t i = 0; i < out_.data.size(); i++) {
+      out_.data[i] = TBlob(data_[i].dptr_, mshadow::Shape1(offsets_[i]),
+                           Context::kCPU, dtypes_[i]);
+    }
+  }
+
+  /* \brief increase the size of i-th data buffer by a factor of 2, while retaining the content */
+  inline void ResizeBuffer(size_t src_size, size_t i) {
+    MSHADOW_TYPE_SWITCH(data_[i].type_flag_, DType, {
+      TBlobContainer temp;
+      temp.resize(mshadow::Shape1(src_size), dtypes_[i]);
+      mshadow::Copy(temp.get<cpu, 1, DType>(), data_[i].get<cpu, 1, DType>().Slice(0, src_size));
+      // increase the size of space exponentially
+      size_t capacity = data_[i].Size();
+      capacity *= 2;
+      data_[i] = TBlobContainer();
+      data_[i].resize(mshadow::Shape1(capacity), dtypes_[i]);
+      // copy back
+      mshadow::Copy(data_[i].get<cpu, 1, DType>().Slice(0, src_size), temp.get<cpu, 1, DType>());
+    });
+  }
+
+  /* \brief copy the data instance to data buffer */
+  void CopyData(const DataInst& d, const size_t top) {
+    int64_t unit_size = 0;
+    out_.inst_index[top] = d.index;
+    for (size_t i = 0; i < d.data.size(); ++i) {
+      if (!IsIndPtr(i)) {
 
 Review comment:
   usually at most 6

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services