You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/11/27 19:09:53 UTC

[GitHub] eric-haibin-lin commented on a change in pull request #13392: add csr sample op

eric-haibin-lin commented on a change in pull request #13392: add csr sample op
URL: https://github.com/apache/incubator-mxnet/pull/13392#discussion_r236785489
 
 

 ##########
 File path: src/operator/contrib/dgl_graph.cc
 ##########
 @@ -35,6 +35,833 @@
 namespace mxnet {
 namespace op {
 
+typedef int64_t dgl_id_t;
+
+////////////////////////////// Graph Sampling ///////////////////////////////
+
+unsigned int seed = 123;
+
+/*
+ * This is used for BFS traversal
+ */
+struct ver_node {
+  dgl_id_t vertex_id;
+  int level;
+};
+
+/*
+ * ArrayHeap is used to sample elements from vector
+ */
+class ArrayHeap {
+ public:
+  // ctor & dctor
+  explicit ArrayHeap(const std::vector<float>& prob) {
+    this->vec_size = prob.size();
+    this->bit_len = ceil(log2(vec_size));
+    this->limit = 1 << bit_len;
+    // allocate twice the size
+    this->heap.resize(limit << 1, 0);
+    // allocate the leaves
+    for (int i = limit; i < vec_size+limit; ++i) {
+      heap[i] = prob[i-limit];
+    }
+    // iterate up the tree (this is O(m))
+    for (int i = bit_len-1; i >= 0; --i) {
+      for (int j = (1 << i); j < (1 << (i + 1)); ++j) {
+        heap[j] = heap[j << 1] + heap[(j << 1) + 1];
+      }
+    }
+  }
+  ~ArrayHeap() {}
+
+  /*
+   * Remove term from index (this costs O(log m) steps)
+   */
+  void Delete(size_t index) {
+    size_t i = index + limit;
+    float w = heap[i];
+    for (int j = bit_len; j >= 0; --j) {
+      heap[i] -= w;
+      i = i >> 1;
+    }
+  }
+
+  /*
+   * Add value w to index (this costs O(log m) steps)
+   */
+  void Add(size_t index, float w) {
+    size_t i = index + limit;
+    for (int j = bit_len; j >= 0; --j) {
+      heap[i] += w;
+      i = i >> 1;
+    }
+  }
+
+  /*
+   * Sample from arrayHeap
+   */
+  size_t Sample() {
+    float xi = heap[1] * (rand_r(&seed)%100/101.0);
+    int i = 1;
+    while (i < limit) {
+      i = i << 1;
+      if (xi >= heap[i]) {
+        xi -= heap[i];
+        i += 1;
+      }
+    }
+    return i - limit;
+  }
+
+  /*
+   * Sample a vector by given the size n
+   */
+  void SampleWithoutReplacement(size_t n, std::vector<size_t>* samples) {
+    // sample n elements
+    for (size_t i = 0; i < n; ++i) {
+      samples->at(i) = this->Sample();
+      this->Delete(samples->at(i));
+    }
+  }
+
+ private:
+  int vec_size;  // sample size
 
 Review comment:
   Can we use underscore to distinguish normal variable and private ones, to improve readability? `vec_size` -> `vec_size_`

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services