You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2017/12/05 04:22:33 UTC

[GitHub] cjolivier01 commented on a change in pull request #8922: fix a bug in sparse batch loader when batch size is extremely large

cjolivier01 commented on a change in pull request #8922: fix a bug in sparse batch loader when batch size is extremely large
URL: https://github.com/apache/incubator-mxnet/pull/8922#discussion_r154843916
 
 

 ##########
 File path: src/io/iter_sparse_batchloader.h
 ##########
 @@ -157,44 +140,100 @@ class SparseBatchLoader : public BatchLoader, public SparseIIterator<TBlobBatch>
       return true;
     }
     // label indptr
-    if (i == label_indptr_offset && label_stype_ == kCSRStorage && data_stype_ == kCSRStorage) {
+    if (i == label_indptr_offset && label_stype_ == kCSRStorage &&
+        data_stype_ == kCSRStorage) {
       return true;
     }
     return false;
   }
 
   // initialize the data holder by using from the batch
-  inline void InitDataFromBatch() {
+  inline void InitData(const DataInst& first_batch) {
     CHECK(data_stype_ == kCSRStorage || label_stype_ == kCSRStorage);
-    CHECK_GT(inst_cache_.size(), 0);
     out_.data.clear();
     data_.clear();
     offsets_.clear();
 
-    size_t total_size = inst_cache_[0].data.size();
+    size_t total_size = first_batch.data.size();
     data_.resize(total_size);
     offsets_.resize(total_size, 0);
-    std::vector<size_t> vec_sizes(total_size, 0);
-    // accumulate the memory required for a batch
+    // tensor buffer sizes
+    std::vector<size_t> buff_sizes(total_size, 0);
+    dtypes_.resize(total_size);
+    out_.data.resize(total_size);
 
 Review comment:
   How big can total_size get?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services