You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by ha...@apache.org on 2018/11/30 02:02:26 UTC

[incubator-mxnet] branch master updated: Add DGL subgraph sampling op (#13392)

This is an automated email from the ASF dual-hosted git repository.

haibin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new c72a38b  Add DGL subgraph sampling op (#13392)
c72a38b is described below

commit c72a38b9afef8dc7261972f7cc43fffb13e7853b
Author: Chao Ma <mc...@gmail.com>
AuthorDate: Fri Nov 30 10:02:09 2018 +0800

    Add DGL subgraph sampling op (#13392)
    
    * add csr sample op
    
    * fix compile error in some platform
    
    * update
    
    * update openmp
    
    * speedup sampling
    
    * update csr
    
    * update csr
    
    * update time seed
    
    * update
    
    * fix compiler error
    
    * update doc
    
    * fix ci error
---
 src/operator/contrib/dgl_graph.cc       | 872 +++++++++++++++++++++++++++++++-
 tests/python/unittest/test_dgl_graph.py | 122 +++++
 2 files changed, 991 insertions(+), 3 deletions(-)

diff --git a/src/operator/contrib/dgl_graph.cc b/src/operator/contrib/dgl_graph.cc
index d9bcdd4..1bb47b8 100644
--- a/src/operator/contrib/dgl_graph.cc
+++ b/src/operator/contrib/dgl_graph.cc
@@ -32,6 +32,875 @@
 namespace mxnet {
 namespace op {
 
+typedef int64_t dgl_id_t;
+
+////////////////////////////// Graph Sampling ///////////////////////////////
+
+/*
+ * ArrayHeap is used to sample elements from vector
+ */
+class ArrayHeap {
+ public:
+  explicit ArrayHeap(const std::vector<float>& prob) {
+    vec_size_ = prob.size();
+    bit_len_ = ceil(log2(vec_size_));
+    limit_ = 1 << bit_len_;
+    // allocate twice the size
+    heap_.resize(limit_ << 1, 0);
+    // allocate the leaves
+    for (int i = limit_; i < vec_size_+limit_; ++i) {
+      heap_[i] = prob[i-limit_];
+    }
+    // iterate up the tree (this is O(m))
+    for (int i = bit_len_-1; i >= 0; --i) {
+      for (int j = (1 << i); j < (1 << (i + 1)); ++j) {
+        heap_[j] = heap_[j << 1] + heap_[(j << 1) + 1];
+      }
+    }
+  }
+  ~ArrayHeap() {}
+
+  /*
+   * Remove term from index (this costs O(log m) steps)
+   */
+  void Delete(size_t index) {
+    size_t i = index + limit_;
+    float w = heap_[i];
+    for (int j = bit_len_; j >= 0; --j) {
+      heap_[i] -= w;
+      i = i >> 1;
+    }
+  }
+
+  /*
+   * Add value w to index (this costs O(log m) steps)
+   */
+  void Add(size_t index, float w) {
+    size_t i = index + limit_;
+    for (int j = bit_len_; j >= 0; --j) {
+      heap_[i] += w;
+      i = i >> 1;
+    }
+  }
+
+  /*
+   * Sample from arrayHeap
+   */
+  size_t Sample(unsigned int* seed) {
+    float xi = heap_[1] * (rand_r(seed)%100/101.0);
+    int i = 1;
+    while (i < limit_) {
+      i = i << 1;
+      if (xi >= heap_[i]) {
+        xi -= heap_[i];
+        i += 1;
+      }
+    }
+    return i - limit_;
+  }
+
+  /*
+   * Sample a vector by given the size n
+   */
+  void SampleWithoutReplacement(size_t n, std::vector<size_t>* samples, unsigned int* seed) {
+    // sample n elements
+    for (size_t i = 0; i < n; ++i) {
+      samples->at(i) = this->Sample(seed);
+      this->Delete(samples->at(i));
+    }
+  }
+
+ private:
+  int vec_size_;  // sample size
+  int bit_len_;   // bit size
+  int limit_;
+  std::vector<float> heap_;
+};
+
+struct NeighborSampleParam : public dmlc::Parameter<NeighborSampleParam> {
+  int num_args;
+  dgl_id_t num_hops;
+  dgl_id_t num_neighbor;
+  dgl_id_t max_num_vertices;
+  DMLC_DECLARE_PARAMETER(NeighborSampleParam) {
+    DMLC_DECLARE_FIELD(num_args).set_lower_bound(2)
+    .describe("Number of input NDArray.");
+    DMLC_DECLARE_FIELD(num_hops)
+      .set_default(1)
+      .describe("Number of hops.");
+    DMLC_DECLARE_FIELD(num_neighbor)
+      .set_default(2)
+      .describe("Number of neighbor.");
+    DMLC_DECLARE_FIELD(max_num_vertices)
+      .set_default(100)
+      .describe("Max number of vertices.");
+  }
+};
+
+DMLC_REGISTER_PARAMETER(NeighborSampleParam);
+
+/*
+ * Check uniform Storage Type
+ */
+static bool CSRNeighborUniformSampleStorageType(const nnvm::NodeAttrs& attrs,
+                                                const int dev_mask,
+                                                DispatchMode* dispatch_mode,
+                                                std::vector<int> *in_attrs,
+                                                std::vector<int> *out_attrs) {
+  const NeighborSampleParam& params = nnvm::get<NeighborSampleParam>(attrs.parsed);
+
+  size_t num_subgraphs = params.num_args - 1;
+  CHECK_EQ(out_attrs->size(), 3 * num_subgraphs);
+
+  // input[0] is csr_graph
+  CHECK_EQ(in_attrs->at(0), mxnet::kCSRStorage);
+  // the rest input ndarray is seed_vector
+  for (size_t i = 0; i < num_subgraphs; i++)
+    CHECK_EQ(in_attrs->at(1 + i), mxnet::kDefaultStorage);
+
+  bool success = true;
+  // sample_id
+  for (size_t i = 0; i < num_subgraphs; i++) {
+    if (!type_assign(&(*out_attrs)[i], mxnet::kDefaultStorage)) {
+      success = false;
+    }
+  }
+  // sub_graph
+  for (size_t i = 0; i < num_subgraphs; i++) {
+    if (!type_assign(&(*out_attrs)[i + num_subgraphs], mxnet::kCSRStorage)) {
+      success = false;
+    }
+  }
+  // sub_layer
+  for (size_t i = 0; i < num_subgraphs; i++) {
+    if (!type_assign(&(*out_attrs)[i + 2*num_subgraphs], mxnet::kDefaultStorage)) {
+      success = false;
+    }
+  }
+
+  *dispatch_mode = DispatchMode::kFComputeEx;
+
+  return success;
+}
+
+/*
+ * Check non-uniform Storage Type
+ */
+static bool CSRNeighborNonUniformSampleStorageType(const nnvm::NodeAttrs& attrs,
+                                                   const int dev_mask,
+                                                   DispatchMode* dispatch_mode,
+                                                   std::vector<int> *in_attrs,
+                                                   std::vector<int> *out_attrs) {
+  const NeighborSampleParam& params =
+    nnvm::get<NeighborSampleParam>(attrs.parsed);
+
+  size_t num_subgraphs = params.num_args - 2;
+  CHECK_EQ(out_attrs->size(), 4 * num_subgraphs);
+
+  // input[0] is csr_graph
+  CHECK_EQ(in_attrs->at(0), mxnet::kCSRStorage);
+  // input[1] is probability
+  CHECK_EQ(in_attrs->at(1), mxnet::kDefaultStorage);
+
+  // the rest input ndarray is seed_vector
+  for (size_t i = 0; i < num_subgraphs; i++)
+    CHECK_EQ(in_attrs->at(2 + i), mxnet::kDefaultStorage);
+
+  bool success = true;
+  // sample_id
+  for (size_t i = 0; i < num_subgraphs; i++) {
+    if (!type_assign(&(*out_attrs)[i], mxnet::kDefaultStorage)) {
+      success = false;
+    }
+  }
+  // sub_graph
+  for (size_t i = 0; i < num_subgraphs; i++) {
+    if (!type_assign(&(*out_attrs)[i + num_subgraphs], mxnet::kCSRStorage)) {
+      success = false;
+    }
+  }
+  // sub_probability
+  for (size_t i = 0; i < num_subgraphs; i++) {
+    if (!type_assign(&(*out_attrs)[i + 2*num_subgraphs], mxnet::kDefaultStorage)) {
+      success = false;
+    }
+  }
+  // sub_layer
+  for (size_t i = 0; i < num_subgraphs; i++) {
+    if (!type_assign(&(*out_attrs)[i + 3*num_subgraphs], mxnet::kDefaultStorage)) {
+      success = false;
+    }
+  }
+
+  *dispatch_mode = DispatchMode::kFComputeEx;
+
+  return success;
+}
+
+/*
+ * Check uniform Shape
+ */
+static bool CSRNeighborUniformSampleShape(const nnvm::NodeAttrs& attrs,
+                                          std::vector<TShape> *in_attrs,
+                                          std::vector<TShape> *out_attrs) {
+  const NeighborSampleParam& params =
+    nnvm::get<NeighborSampleParam>(attrs.parsed);
+
+  size_t num_subgraphs = params.num_args - 1;
+  CHECK_EQ(out_attrs->size(), 3 * num_subgraphs);
+  // input[0] is csr graph
+  CHECK_EQ(in_attrs->at(0).ndim(), 2U);
+  CHECK_EQ(in_attrs->at(0)[0], in_attrs->at(0)[1]);
+
+  // the rest input ndarray is seed vector
+  for (size_t i = 0; i < num_subgraphs; i++) {
+    CHECK_EQ(in_attrs->at(1 + i).ndim(), 1U);
+  }
+
+  // Output
+  bool success = true;
+  TShape out_shape(1);
+  // We use the last element to store the actual
+  // number of vertices in the subgraph.
+  out_shape[0] = params.max_num_vertices + 1;
+  for (size_t i = 0; i < num_subgraphs; i++) {
+    SHAPE_ASSIGN_CHECK(*out_attrs, i, out_shape);
+    success = success &&
+              out_attrs->at(i).ndim() != 0U &&
+              out_attrs->at(i).Size() != 0U;
+  }
+  // sub_csr
+  TShape out_csr_shape(2);
+  out_csr_shape[0] = params.max_num_vertices;
+  out_csr_shape[1] = in_attrs->at(0)[1];
+  for (size_t i = 0; i < num_subgraphs; i++) {
+    SHAPE_ASSIGN_CHECK(*out_attrs, i + num_subgraphs, out_csr_shape);
+    success = success &&
+              out_attrs->at(i + num_subgraphs).ndim() != 0U &&
+              out_attrs->at(i + num_subgraphs).Size() != 0U;
+  }
+  // sub_layer
+  TShape out_layer_shape(1);
+  out_layer_shape[0] = params.max_num_vertices;
+  for (size_t i = 0; i < num_subgraphs; i++) {
+    SHAPE_ASSIGN_CHECK(*out_attrs, i + 2*num_subgraphs, out_layer_shape);
+    success = success &&
+              out_attrs->at(i + 2*num_subgraphs).ndim() != 0U &&
+              out_attrs->at(i + 2*num_subgraphs).Size() != 0U;
+  }
+
+  return success;
+}
+
+/*
+ * Check non-uniform Shape
+ */
+static bool CSRNeighborNonUniformSampleShape(const nnvm::NodeAttrs& attrs,
+                                             std::vector<TShape> *in_attrs,
+                                             std::vector<TShape> *out_attrs) {
+  const NeighborSampleParam& params =
+    nnvm::get<NeighborSampleParam>(attrs.parsed);
+
+  size_t num_subgraphs = params.num_args - 2;
+  CHECK_EQ(out_attrs->size(), 4 * num_subgraphs);
+  // input[0] is csr graph
+  CHECK_EQ(in_attrs->at(0).ndim(), 2U);
+  CHECK_EQ(in_attrs->at(0)[0], in_attrs->at(0)[1]);
+
+  // input[1] is probability
+  CHECK_EQ(in_attrs->at(1).ndim(), 1U);
+
+  // the rest ndarray is seed vector
+  for (size_t i = 0; i < num_subgraphs; i++) {
+    CHECK_EQ(in_attrs->at(2 + i).ndim(), 1U);
+  }
+
+  // Output
+  bool success = true;
+  TShape out_shape(1);
+  // We use the last element to store the actual
+  // number of vertices in the subgraph.
+  out_shape[0] = params.max_num_vertices + 1;
+  for (size_t i = 0; i < num_subgraphs; i++) {
+    SHAPE_ASSIGN_CHECK(*out_attrs, i, out_shape);
+    success = success &&
+              out_attrs->at(i).ndim() != 0U &&
+              out_attrs->at(i).Size() != 0U;
+  }
+  // sub_csr
+  TShape out_csr_shape(2);
+  out_csr_shape[0] = params.max_num_vertices;
+  out_csr_shape[1] = in_attrs->at(0)[1];
+  for (size_t i = 0; i < num_subgraphs; i++) {
+    SHAPE_ASSIGN_CHECK(*out_attrs, i + num_subgraphs, out_csr_shape);
+    success = success &&
+              out_attrs->at(i + num_subgraphs).ndim() != 0U &&
+              out_attrs->at(i + num_subgraphs).Size() != 0U;
+  }
+  // sub_probability
+  TShape out_prob_shape(1);
+  out_prob_shape[0] = params.max_num_vertices;
+  for (size_t i = 0; i < num_subgraphs; i++) {
+    SHAPE_ASSIGN_CHECK(*out_attrs, i + 2*num_subgraphs, out_prob_shape);
+    success = success &&
+              out_attrs->at(i + 2*num_subgraphs).ndim() != 0U &&
+              out_attrs->at(i + 2*num_subgraphs).Size() != 0U;
+  }
+  // sub_layer
+  TShape out_layer_shape(1);
+  out_layer_shape[0] = params.max_num_vertices;
+  for (size_t i = 0; i < num_subgraphs; i++) {
+    SHAPE_ASSIGN_CHECK(*out_attrs, i + 3*num_subgraphs, out_prob_shape);
+    success = success &&
+              out_attrs->at(i + 3*num_subgraphs).ndim() != 0U &&
+              out_attrs->at(i + 3*num_subgraphs).Size() != 0U;
+  }
+
+  return success;
+}
+
+/*
+ * Check uniform Type
+ */
+static bool CSRNeighborUniformSampleType(const nnvm::NodeAttrs& attrs,
+                                         std::vector<int> *in_attrs,
+                                         std::vector<int> *out_attrs) {
+  const NeighborSampleParam& params =
+    nnvm::get<NeighborSampleParam>(attrs.parsed);
+
+  size_t num_subgraphs = params.num_args - 1;
+  CHECK_EQ(out_attrs->size(), 3 * num_subgraphs);
+
+  bool success = true;
+  for (size_t i = 0; i < num_subgraphs; i++) {
+    TYPE_ASSIGN_CHECK(*out_attrs, i, in_attrs->at(1));
+    TYPE_ASSIGN_CHECK(*out_attrs, i + num_subgraphs, in_attrs->at(0));
+    TYPE_ASSIGN_CHECK(*out_attrs, i + 2*num_subgraphs, in_attrs->at(1));
+    success = success &&
+               out_attrs->at(i) != -1 &&
+               out_attrs->at(i + num_subgraphs) != -1 &&
+               out_attrs->at(i + 2*num_subgraphs) != -1;
+  }
+
+  return success;
+}
+
+/*
+ * Check non-uniform Type
+ */
+static bool CSRNeighborNonUniformSampleType(const nnvm::NodeAttrs& attrs,
+                                            std::vector<int> *in_attrs,
+                                            std::vector<int> *out_attrs) {
+  const NeighborSampleParam& params =
+    nnvm::get<NeighborSampleParam>(attrs.parsed);
+
+  size_t num_subgraphs = params.num_args - 2;
+  CHECK_EQ(out_attrs->size(), 4 * num_subgraphs);
+
+  bool success = true;
+  for (size_t i = 0; i < num_subgraphs; i++) {
+    TYPE_ASSIGN_CHECK(*out_attrs, i, in_attrs->at(2));
+    TYPE_ASSIGN_CHECK(*out_attrs, i + num_subgraphs, in_attrs->at(0));
+    TYPE_ASSIGN_CHECK(*out_attrs, i + 2*num_subgraphs, in_attrs->at(1));
+    TYPE_ASSIGN_CHECK(*out_attrs, i + 3*num_subgraphs, in_attrs->at(2));
+    success = success &&
+               out_attrs->at(i) != -1 &&
+               out_attrs->at(i + num_subgraphs) != -1 &&
+               out_attrs->at(i + 2*num_subgraphs) != -1 &&
+               out_attrs->at(i + 3*num_subgraphs) != -1;
+  }
+
+  return success;
+}
+
+/*
+ * Get src vertex and edge id for a destination vertex
+ */
+static void GetSrcList(const dgl_id_t* val_list,
+                       const dgl_id_t* col_list,
+                       const dgl_id_t* indptr,
+                       const dgl_id_t dst_id,
+                       std::vector<dgl_id_t>* src_list,
+                       std::vector<dgl_id_t>* edge_list) {
+  for (dgl_id_t i = *(indptr+dst_id); i < *(indptr+dst_id+1); ++i) {
+    src_list->push_back(col_list[i]);
+    edge_list->push_back(val_list[i]);
+  }
+}
+
+static void RandomSample(size_t set_size,
+                         size_t num,
+                         std::vector<size_t>* out,
+                         unsigned int* seed) {
+  std::unordered_set<size_t> sampled_idxs;
+  while (sampled_idxs.size() < num) {
+    sampled_idxs.insert(rand_r(seed) % set_size);
+  }
+  out->clear();
+  for (auto it = sampled_idxs.begin(); it != sampled_idxs.end(); it++) {
+    out->push_back(*it);
+  }
+}
+
+static void NegateSet(const std::vector<size_t> &idxs,
+                      size_t set_size,
+                      std::vector<size_t>* out) {
+  // idxs must have been sorted.
+  auto it = idxs.begin();
+  size_t i = 0;
+  CHECK_GT(set_size, idxs.back());
+  for (; i < set_size && it != idxs.end(); i++) {
+    if (*it == i) {
+      it++;
+      continue;
+    }
+    out->push_back(i);
+  }
+  for (; i < set_size; i++) {
+    out->push_back(i);
+  }
+}
+
+/*
+ * Uniform sample
+ */
+static void GetUniformSample(const std::vector<dgl_id_t>& ver_list,
+                             const std::vector<dgl_id_t>& edge_list,
+                             const size_t max_num_neighbor,
+                             std::vector<dgl_id_t>* out_ver,
+                             std::vector<dgl_id_t>* out_edge,
+                             unsigned int* seed) {
+  CHECK_EQ(ver_list.size(), edge_list.size());
+  // Copy ver_list to output
+  if (ver_list.size() <= max_num_neighbor) {
+    for (size_t i = 0; i < ver_list.size(); ++i) {
+      out_ver->push_back(ver_list[i]);
+      out_edge->push_back(edge_list[i]);
+    }
+    return;
+  }
+  // If we just sample a small number of elements from a large neighbor list.
+  std::vector<size_t> sorted_idxs;
+  if (ver_list.size() > max_num_neighbor * 2) {
+    sorted_idxs.reserve(max_num_neighbor);
+    RandomSample(ver_list.size(), max_num_neighbor, &sorted_idxs, seed);
+    std::sort(sorted_idxs.begin(), sorted_idxs.end());
+  } else {
+    std::vector<size_t> negate;
+    negate.reserve(ver_list.size() - max_num_neighbor);
+    RandomSample(ver_list.size(), ver_list.size() - max_num_neighbor,
+                 &negate, seed);
+    std::sort(negate.begin(), negate.end());
+    NegateSet(negate, ver_list.size(), &sorted_idxs);
+  }
+  // verify the result.
+  CHECK_EQ(sorted_idxs.size(), max_num_neighbor);
+  for (size_t i = 1; i < sorted_idxs.size(); i++) {
+    CHECK_GT(sorted_idxs[i], sorted_idxs[i - 1]);
+  }
+  for (auto idx : sorted_idxs) {
+    out_ver->push_back(ver_list[idx]);
+    out_edge->push_back(edge_list[idx]);
+  }
+}
+
+/*
+ * Non-uniform sample via ArrayHeap
+ */
+static void GetNonUniformSample(const float* probability,
+                                const std::vector<dgl_id_t>& ver_list,
+                                const std::vector<dgl_id_t>& edge_list,
+                                const size_t max_num_neighbor,
+                                std::vector<dgl_id_t>* out_ver,
+                                std::vector<dgl_id_t>* out_edge,
+                                unsigned int* seed) {
+  CHECK_EQ(ver_list.size(), edge_list.size());
+  // Copy ver_list to output
+  if (ver_list.size() <= max_num_neighbor) {
+    for (size_t i = 0; i < ver_list.size(); ++i) {
+      out_ver->push_back(ver_list[i]);
+      out_edge->push_back(edge_list[i]);
+    }
+    return;
+  }
+  // Make sample
+  std::vector<size_t> sp_index(max_num_neighbor);
+  std::vector<float> sp_prob(ver_list.size());
+  for (size_t i = 0; i < ver_list.size(); ++i) {
+    sp_prob[i] = probability[ver_list[i]];
+  }
+  ArrayHeap arrayHeap(sp_prob);
+  arrayHeap.SampleWithoutReplacement(max_num_neighbor, &sp_index, seed);
+  out_ver->resize(max_num_neighbor);
+  out_edge->resize(max_num_neighbor);
+  for (size_t i = 0; i < max_num_neighbor; ++i) {
+    size_t idx = sp_index[i];
+    out_ver->at(i) = ver_list[idx];
+    out_edge->at(i) = edge_list[idx];
+  }
+  sort(out_ver->begin(), out_ver->end());
+  sort(out_edge->begin(), out_edge->end());
+}
+
+/*
+ * This is used for BFS traversal
+ */
+struct ver_node {
+  dgl_id_t vertex_id;
+  int level;
+};
+
+/*
+ * Used for subgraph sampling
+ */
+struct neigh_list {
+  std::vector<dgl_id_t> neighs;
+  std::vector<dgl_id_t> edges;
+  neigh_list(const std::vector<dgl_id_t> &_neighs,
+             const std::vector<dgl_id_t> &_edges)
+    : neighs(_neighs), edges(_edges) {}
+};
+
+/*
+ * Sample sub-graph from csr graph
+ */
+static void SampleSubgraph(const NDArray &csr,
+                           const NDArray &seed_arr,
+                           const NDArray &sampled_ids,
+                           const NDArray &sub_csr,
+                           float* sub_prob,
+                           const NDArray &sub_layer,
+                           const float* probability,
+                           dgl_id_t num_hops,
+                           dgl_id_t num_neighbor,
+                           dgl_id_t max_num_vertices) {
+  unsigned int time_seed = time(nullptr);
+  size_t num_seeds = seed_arr.shape().Size();
+  CHECK_GE(max_num_vertices, num_seeds);
+
+  const dgl_id_t* val_list = csr.data().dptr<dgl_id_t>();
+  const dgl_id_t* col_list = csr.aux_data(csr::kIdx).dptr<dgl_id_t>();
+  const dgl_id_t* indptr = csr.aux_data(csr::kIndPtr).dptr<dgl_id_t>();
+  const dgl_id_t* seed = seed_arr.data().dptr<dgl_id_t>();
+  dgl_id_t* out = sampled_ids.data().dptr<dgl_id_t>();
+  dgl_id_t* out_layer = sub_layer.data().dptr<dgl_id_t>();
+
+  // BFS traverse the graph and sample vertices
+  dgl_id_t sub_vertices_count = 0;
+  // <vertex_id, layer_id>
+  std::unordered_map<dgl_id_t, int> sub_ver_mp;
+  std::queue<ver_node> node_queue;
+  // add seed vertices
+  for (size_t i = 0; i < num_seeds; ++i) {
+    ver_node node;
+    node.vertex_id = seed[i];
+    node.level = 0;
+    node_queue.push(node);
+  }
+  std::vector<dgl_id_t> tmp_src_list;
+  std::vector<dgl_id_t> tmp_edge_list;
+  std::vector<dgl_id_t> tmp_sampled_src_list;
+  std::vector<dgl_id_t> tmp_sampled_edge_list;
+  std::unordered_map<dgl_id_t, neigh_list> neigh_mp;
+  size_t num_edges = 0;
+  while (!node_queue.empty() &&
+    sub_vertices_count <= max_num_vertices ) {
+    ver_node& cur_node = node_queue.front();
+    dgl_id_t dst_id = cur_node.vertex_id;
+    if (cur_node.level < num_hops) {
+      auto ret = sub_ver_mp.find(dst_id);
+      if (ret != sub_ver_mp.end()) {
+        node_queue.pop();
+        continue;
+      }
+      tmp_src_list.clear();
+      tmp_edge_list.clear();
+      tmp_sampled_src_list.clear();
+      tmp_sampled_edge_list.clear();
+      GetSrcList(val_list,
+                 col_list,
+                 indptr,
+                 dst_id,
+                 &tmp_src_list,
+                 &tmp_edge_list);
+      if (probability == nullptr) {  // uniform-sample
+        GetUniformSample(tmp_src_list,
+                       tmp_edge_list,
+                       num_neighbor,
+                       &tmp_sampled_src_list,
+                       &tmp_sampled_edge_list,
+                       &time_seed);
+      } else {  // non-uniform-sample
+        GetNonUniformSample(probability,
+                       tmp_src_list,
+                       tmp_edge_list,
+                       num_neighbor,
+                       &tmp_sampled_src_list,
+                       &tmp_sampled_edge_list,
+                       &time_seed);
+      }
+      neigh_mp.insert(std::pair<dgl_id_t, neigh_list>(dst_id,
+        neigh_list(tmp_sampled_src_list,
+                   tmp_sampled_edge_list)));
+      num_edges += tmp_sampled_src_list.size();
+      sub_ver_mp[cur_node.vertex_id] = cur_node.level;
+      for (size_t i = 0; i < tmp_sampled_src_list.size(); ++i) {
+        auto ret = sub_ver_mp.find(tmp_sampled_src_list[i]);
+        if (ret == sub_ver_mp.end()) {
+          ver_node new_node;
+          new_node.vertex_id = tmp_sampled_src_list[i];
+          new_node.level = cur_node.level + 1;
+          node_queue.push(new_node);
+        }
+      }
+    } else {  // vertex without any neighbor
+      auto ret = sub_ver_mp.find(dst_id);
+      if (ret != sub_ver_mp.end()) {
+        node_queue.pop();
+        continue;
+      }
+      tmp_sampled_src_list.clear();
+      tmp_sampled_edge_list.clear();
+      neigh_mp.insert(std::pair<dgl_id_t, neigh_list>(dst_id,
+        neigh_list(tmp_sampled_src_list,      // empty vector
+                   tmp_sampled_edge_list)));  // empty vector
+      sub_ver_mp[cur_node.vertex_id] = cur_node.level;
+    }
+    sub_vertices_count++;
+    node_queue.pop();
+  }
+
+  // Copy sub_ver_mp to output[0]
+  size_t idx = 0;
+  for (auto& data : sub_ver_mp) {
+    *(out+idx) = data.first;
+    idx++;
+  }
+  size_t num_vertices = sub_ver_mp.size();
+  std::sort(out, out + num_vertices);
+  // The rest data will be set to -1
+  for (dgl_id_t i = idx; i < max_num_vertices; ++i) {
+    *(out+i) = -1;
+  }
+  // The last element stores the actual
+  // number of vertices in the subgraph.
+  out[max_num_vertices] = sub_ver_mp.size();
+  // Copy sub_probability
+  if (sub_prob != nullptr) {
+    for (dgl_id_t i = 0; i < max_num_vertices; ++i) {
+      dgl_id_t idx = out[i];
+      if (idx != -1) {
+        sub_prob[i] = probability[idx];
+      } else {
+        sub_prob[i] = -1;
+      }
+    }
+  }
+  // Copy layer
+  for (dgl_id_t i = 0; i < max_num_vertices; ++i) {
+    dgl_id_t idx = out[i];
+    if (idx != -1) {
+      out_layer[i] = sub_ver_mp[idx];
+    } else {
+      out_layer[i] = -1;
+    }
+  }
+  // Construct sub_csr_graph
+  TShape shape_1(1);
+  TShape shape_2(1);
+  shape_1[0] = num_edges;
+  shape_2[0] = max_num_vertices+1;
+  sub_csr.CheckAndAllocData(shape_1);
+  sub_csr.CheckAndAllocAuxData(csr::kIdx, shape_1);
+  sub_csr.CheckAndAllocAuxData(csr::kIndPtr, shape_2);
+  dgl_id_t* val_list_out = sub_csr.data().dptr<dgl_id_t>();
+  dgl_id_t* col_list_out = sub_csr.aux_data(1).dptr<dgl_id_t>();
+  dgl_id_t* indptr_out = sub_csr.aux_data(0).dptr<dgl_id_t>();
+  indptr_out[0] = 0;
+  size_t collected_nedges = 0;
+  for (size_t i = 0; i < num_vertices; i++) {
+    dgl_id_t dst_id = *(out + i);
+    auto it = neigh_mp.find(dst_id);
+    const auto &edges = it->second.edges;
+    const auto &neighs = it->second.neighs;
+    CHECK_EQ(edges.size(), neighs.size());
+    if (!edges.empty()) {
+      std::copy(edges.begin(), edges.end(), val_list_out + collected_nedges);
+      std::copy(neighs.begin(), neighs.end(), col_list_out + collected_nedges);
+      collected_nedges += edges.size();
+    }
+    indptr_out[i+1] = indptr_out[i] + edges.size();
+  }
+  for (dgl_id_t i = num_vertices+1; i <= max_num_vertices; ++i) {
+    indptr_out[i] = indptr_out[i-1];
+  }
+}
+
+/*
+ * Operator: contrib_csr_neighbor_uniform_sample
+ */
+static void CSRNeighborUniformSampleComputeExCPU(const nnvm::NodeAttrs& attrs,
+                                          const OpContext& ctx,
+                                          const std::vector<NDArray>& inputs,
+                                          const std::vector<OpReqType>& req,
+                                          const std::vector<NDArray>& outputs) {
+  const NeighborSampleParam& params =
+    nnvm::get<NeighborSampleParam>(attrs.parsed);
+
+  int num_subgraphs = inputs.size() - 1;
+  CHECK_EQ(outputs.size(), 3 * num_subgraphs);
+
+#pragma omp parallel for
+  for (int i = 0; i < num_subgraphs; i++) {
+    SampleSubgraph(inputs[0],                     // graph_csr
+                   inputs[i + 1],                 // seed vector
+                   outputs[i],                    // sample_id
+                   outputs[i + 1*num_subgraphs],  // sub_csr
+                   nullptr,                       // sample_id_probability
+                   outputs[i + 2*num_subgraphs],  // sample_id_layer
+                   nullptr,                       // probability
+                   params.num_hops,
+                   params.num_neighbor,
+                   params.max_num_vertices);
+  }
+}
+
+NNVM_REGISTER_OP(_contrib_dgl_csr_neighbor_uniform_sample)
+.describe(R"code(This operator samples sub-graph from a csr graph via an
+uniform probability. 
+Example::
+
+  shape = (5, 5)
+  data_np = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20], dtype=np.int64)
+  indices_np = np.array([1,2,3,4,0,2,3,4,0,1,3,4,0,1,2,4,0,1,2,3], dtype=np.int64)
+  indptr_np = np.array([0,4,8,12,16,20], dtype=np.int64)
+  a = mx.nd.sparse.csr_matrix((data_np, indices_np, indptr_np), shape=shape)
+  a.asnumpy()
+  seed = mx.nd.array([0,1,2,3,4], dtype=np.int64)
+  out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2, num_hops=1, num_neighbor=2, max_num_vertices=5)
+
+  out[0]
+  [0 1 2 3 4 5]
+  <NDArray 6 @cpu(0)>
+
+  out[1].asnumpy()
+  array([[ 0,  1,  0,  3,  0],
+         [ 5,  0,  0,  7,  0],
+         [ 9,  0,  0, 11,  0],
+         [13,  0, 15,  0,  0],
+         [17,  0, 19,  0,  0]])
+
+  out[2]
+  [0 0 0 0 0]
+<NDArray 5 @cpu(0)>
+)code" ADD_FILELINE)
+.set_attr_parser(ParamParser<NeighborSampleParam>)
+.set_num_inputs([](const NodeAttrs& attrs) {
+  const NeighborSampleParam& params =
+    nnvm::get<NeighborSampleParam>(attrs.parsed);
+  return params.num_args;
+})
+.set_num_outputs([](const NodeAttrs& attrs) {
+  const NeighborSampleParam& params =
+    nnvm::get<NeighborSampleParam>(attrs.parsed);
+  size_t num_subgraphs = params.num_args - 1;
+  return num_subgraphs * 3;
+})
+.set_attr<FInferStorageType>("FInferStorageType", CSRNeighborUniformSampleStorageType)
+.set_attr<nnvm::FInferShape>("FInferShape", CSRNeighborUniformSampleShape)
+.set_attr<nnvm::FInferType>("FInferType", CSRNeighborUniformSampleType)
+.set_attr<FComputeEx>("FComputeEx<cpu>", CSRNeighborUniformSampleComputeExCPU)
+.add_argument("csr_matrix", "NDArray-or-Symbol", "csr matrix")
+.add_argument("seed_arrays", "NDArray-or-Symbol[]", "seed vertices")
+.set_attr<std::string>("key_var_num_args", "num_args")
+.add_arguments(NeighborSampleParam::__FIELDS__());
+
+/*
+ * Operator: contrib_csr_neighbor_non_uniform_sample
+ */
+static void CSRNeighborNonUniformSampleComputeExCPU(const nnvm::NodeAttrs& attrs,
+                                              const OpContext& ctx,
+                                              const std::vector<NDArray>& inputs,
+                                              const std::vector<OpReqType>& req,
+                                              const std::vector<NDArray>& outputs) {
+  const NeighborSampleParam& params =
+    nnvm::get<NeighborSampleParam>(attrs.parsed);
+
+  int num_subgraphs = inputs.size() - 2;
+  CHECK_EQ(outputs.size(), 4 * num_subgraphs);
+
+  const float* probability = inputs[1].data().dptr<float>();
+
+#pragma omp parallel for
+  for (int i = 0; i < num_subgraphs; i++) {
+    float* sub_prob = outputs[i+2*num_subgraphs].data().dptr<float>();
+    SampleSubgraph(inputs[0],                     // graph_csr
+                   inputs[i + 2],                 // seed vector
+                   outputs[i],                    // sample_id
+                   outputs[i + 1*num_subgraphs],  // sub_csr
+                   sub_prob,                      // sample_id_probability
+                   outputs[i + 3*num_subgraphs],  // sample_id_layer
+                   probability,
+                   params.num_hops,
+                   params.num_neighbor,
+                   params.max_num_vertices);
+  }
+}
+
+NNVM_REGISTER_OP(_contrib_dgl_csr_neighbor_non_uniform_sample)
+.describe(R"code(This operator samples sub-graph from a csr graph via an
+uniform probability. 
+Example::
+
+  shape = (5, 5)
+  prob = mx.nd.array([0.9, 0.8, 0.2, 0.4, 0.1], dtype=np.float32)
+  data_np = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20], dtype=np.int64)
+  indices_np = np.array([1,2,3,4,0,2,3,4,0,1,3,4,0,1,2,4,0,1,2,3], dtype=np.int64)
+  indptr_np = np.array([0,4,8,12,16,20], dtype=np.int64)
+  a = mx.nd.sparse.csr_matrix((data_np, indices_np, indptr_np), shape=shape)
+  seed = mx.nd.array([0,1,2,3,4], dtype=np.int64)
+  out = mx.nd.contrib.dgl_csr_neighbor_non_uniform_sample(a, prob, seed, num_args=3, num_hops=1, num_neighbor=2, max_num_vertices=5)
+
+  out[0]
+  [0 1 2 3 4 5]
+  <NDArray 6 @cpu(0)>
+
+  out[1].asnumpy()
+  array([[ 0,  1,  2,  0,  0],
+         [ 5,  0,  6,  0,  0],
+         [ 9, 10,  0,  0,  0],
+         [13, 14,  0,  0,  0],
+         [ 0, 18, 19,  0,  0]])
+
+  out[2]
+  [0.9 0.8 0.2 0.4 0.1]
+  <NDArray 5 @cpu(0)>
+
+  out[3]
+  [0 0 0 0 0]
+  <NDArray 5 @cpu(0)>
+)code" ADD_FILELINE)
+.set_attr_parser(ParamParser<NeighborSampleParam>)
+.set_num_inputs([](const NodeAttrs& attrs) {
+  const NeighborSampleParam& params =
+    nnvm::get<NeighborSampleParam>(attrs.parsed);
+  return params.num_args;
+})
+.set_num_outputs([](const NodeAttrs& attrs) {
+  const NeighborSampleParam& params =
+    nnvm::get<NeighborSampleParam>(attrs.parsed);
+  size_t num_subgraphs = params.num_args - 2;
+  return num_subgraphs * 4;
+})
+.set_attr<FInferStorageType>("FInferStorageType", CSRNeighborNonUniformSampleStorageType)
+.set_attr<nnvm::FInferShape>("FInferShape", CSRNeighborNonUniformSampleShape)
+.set_attr<nnvm::FInferType>("FInferType", CSRNeighborNonUniformSampleType)
+.set_attr<FComputeEx>("FComputeEx<cpu>", CSRNeighborNonUniformSampleComputeExCPU)
+.add_argument("csr_matrix", "NDArray-or-Symbol", "csr matrix")
+.add_argument("probability", "NDArray-or-Symbol", "probability vector")
+.add_argument("seed_arrays", "NDArray-or-Symbol[]", "seed vertices")
+.set_attr<std::string>("key_var_num_args", "num_args")
+.add_arguments(NeighborSampleParam::__FIELDS__());
 
 ///////////////////////// Create induced subgraph ///////////////////////////
 
@@ -104,8 +973,6 @@ static bool DGLSubgraphType(const nnvm::NodeAttrs& attrs,
   return true;
 }
 
-typedef int64_t dgl_id_t;
-
 class Bitmap {
   const size_t size = 1024 * 1024 * 4;
   const size_t mask = size - 1;
@@ -523,6 +1390,5 @@ Example::
 .set_attr<FComputeEx>("FComputeEx<cpu>", DGLAdjacencyForwardEx<cpu>)
 .add_argument("data", "NDArray-or-Symbol", "Input ndarray");
 
-
 }  // namespace op
 }  // namespace mxnet
diff --git a/tests/python/unittest/test_dgl_graph.py b/tests/python/unittest/test_dgl_graph.py
index 1d758b9..f996d7f 100644
--- a/tests/python/unittest/test_dgl_graph.py
+++ b/tests/python/unittest/test_dgl_graph.py
@@ -26,6 +26,128 @@ from numpy.testing import assert_allclose, assert_array_equal
 from mxnet.test_utils import *
 import unittest
 
+def check_uniform(out, num_hops, max_num_vertices):
+    sample_id = out[0]
+    sub_csr = out[1]
+    layer = out[2]
+    # check sample_id
+    assert (len(sample_id) == max_num_vertices+1)
+    count = 0
+    for data in sample_id:
+        if data != -1:
+            count = count + 1
+    assert (mx.nd.array([count-1], dtype=np.int64) == sample_id[-1])
+    # check sub_csr
+    sub_csr.check_format(full_check=True)
+    # check layer
+    for data in layer:
+        assert(data <= num_hops)
+
+def check_non_uniform(out, num_hops, max_num_vertices):
+    sample_id = out[0]
+    sub_csr = out[1]
+    prob = out[2]
+    layer = out[3]
+    # check sample_id
+    assert (len(sample_id) == max_num_vertices+1)
+    count = 0
+    for data in sample_id:
+        if data != -1:
+            count = count + 1
+    assert (mx.nd.array([count-1], dtype=np.int64) == sample_id[-1])
+    # check sub_csr
+    sub_csr.check_format(full_check=True)
+    # check prob
+    assert (len(prob) == max_num_vertices)
+    # check layer
+    for data in layer:
+        assert(data <= num_hops)
+
+def test_uniform_sample():
+    shape = (5, 5)
+    data_np = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20], dtype=np.int64)
+    indices_np = np.array([1,2,3,4,0,2,3,4,0,1,3,4,0,1,2,4,0,1,2,3], dtype=np.int64)
+    indptr_np = np.array([0,4,8,12,16,20], dtype=np.int64)
+    a = mx.nd.sparse.csr_matrix((data_np, indices_np, indptr_np), shape=shape)
+
+    seed = mx.nd.array([0,1,2,3,4], dtype=np.int64)
+    out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2, num_hops=1, num_neighbor=2, max_num_vertices=5)
+    assert (len(out) == 3)
+    check_uniform(out, num_hops=1, max_num_vertices=5)
+
+    seed = mx.nd.array([0], dtype=np.int64)
+    out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2, num_hops=1, num_neighbor=1, max_num_vertices=4)
+    assert (len(out) == 3)
+    check_uniform(out, num_hops=1, max_num_vertices=4)
+
+    seed = mx.nd.array([0], dtype=np.int64)
+    out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2, num_hops=2, num_neighbor=1, max_num_vertices=4)
+    assert (len(out) == 3)
+    check_uniform(out, num_hops=2, max_num_vertices=4)
+
+    seed = mx.nd.array([0,2,4], dtype=np.int64)
+    out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2, num_hops=1, num_neighbor=2, max_num_vertices=5)
+    assert (len(out) == 3)
+    check_uniform(out, num_hops=1, max_num_vertices=5)
+
+    seed = mx.nd.array([0,4], dtype=np.int64)
+    out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2, num_hops=1, num_neighbor=2, max_num_vertices=5)
+    assert (len(out) == 3)
+    check_uniform(out, num_hops=1, max_num_vertices=5)
+
+    seed = mx.nd.array([0,4], dtype=np.int64)
+    out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2, num_hops=2, num_neighbor=2, max_num_vertices=5)
+    assert (len(out) == 3)
+    check_uniform(out, num_hops=2, max_num_vertices=5)
+
+    seed = mx.nd.array([0,4], dtype=np.int64)
+    out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2, num_hops=1, num_neighbor=2, max_num_vertices=5)
+    assert (len(out) == 3)
+    check_uniform(out, num_hops=1, max_num_vertices=5)
+
+def test_non_uniform_sample():
+    shape = (5, 5)
+    prob = mx.nd.array([0.9, 0.8, 0.2, 0.4, 0.1], dtype=np.float32)
+    data_np = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20], dtype=np.int64)
+    indices_np = np.array([1,2,3,4,0,2,3,4,0,1,3,4,0,1,2,4,0,1,2,3], dtype=np.int64)
+    indptr_np = np.array([0,4,8,12,16,20], dtype=np.int64)
+    a = mx.nd.sparse.csr_matrix((data_np, indices_np, indptr_np), shape=shape)
+
+    seed = mx.nd.array([0,1,2,3,4], dtype=np.int64)
+    out = mx.nd.contrib.dgl_csr_neighbor_non_uniform_sample(a, prob, seed, num_args=3, num_hops=1, num_neighbor=2, max_num_vertices=5)
+    assert (len(out) == 4)
+    check_non_uniform(out, num_hops=1, max_num_vertices=5)
+
+    seed = mx.nd.array([0], dtype=np.int64)
+    out = mx.nd.contrib.dgl_csr_neighbor_non_uniform_sample(a, prob, seed, num_args=3, num_hops=1, num_neighbor=1, max_num_vertices=4)
+    assert (len(out) == 4)
+    check_non_uniform(out, num_hops=1, max_num_vertices=4)
+
+    seed = mx.nd.array([0], dtype=np.int64)
+    out = mx.nd.contrib.dgl_csr_neighbor_non_uniform_sample(a, prob, seed, num_args=3, num_hops=2, num_neighbor=1, max_num_vertices=4)
+    assert (len(out) == 4)
+    check_non_uniform(out, num_hops=2, max_num_vertices=4)
+
+    seed = mx.nd.array([0,2,4], dtype=np.int64)
+    out = mx.nd.contrib.dgl_csr_neighbor_non_uniform_sample(a, prob, seed, num_args=3, num_hops=1, num_neighbor=2, max_num_vertices=5)
+    assert (len(out) == 4)
+    check_non_uniform(out, num_hops=1, max_num_vertices=5)
+
+    seed = mx.nd.array([0,4], dtype=np.int64)
+    out = mx.nd.contrib.dgl_csr_neighbor_non_uniform_sample(a, prob, seed, num_args=3, num_hops=1, num_neighbor=2, max_num_vertices=5)
+    assert (len(out) == 4)
+    check_non_uniform(out, num_hops=1, max_num_vertices=5)
+
+    seed = mx.nd.array([0,4], dtype=np.int64)
+    out = mx.nd.contrib.dgl_csr_neighbor_non_uniform_sample(a, prob, seed, num_args=3, num_hops=2, num_neighbor=2, max_num_vertices=5)
+    assert (len(out) == 4)
+    check_non_uniform(out, num_hops=2, max_num_vertices=5)
+
+    seed = mx.nd.array([0,4], dtype=np.int64)
+    out = mx.nd.contrib.dgl_csr_neighbor_non_uniform_sample(a, prob, seed, num_args=3, num_hops=1, num_neighbor=2, max_num_vertices=5)
+    assert (len(out) == 4)
+    check_non_uniform(out, num_hops=1, max_num_vertices=5)
+
 def test_edge_id():
     shape = rand_shape_2d()
     data = rand_ndarray(shape, stype='csr', density=0.4)