You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by ha...@apache.org on 2018/12/19 08:52:06 UTC

[incubator-mxnet] branch master updated: Accelerate DGL csr neighbor sampling (#13588)

This is an automated email from the ASF dual-hosted git repository.

haibin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new defa614  Accelerate DGL csr neighbor sampling (#13588)
defa614 is described below

commit defa614d777efbffda69e39d3c998069e8bcfa3a
Author: BullDemonKing <45...@users.noreply.github.com>
AuthorDate: Wed Dec 19 16:51:35 2018 +0800

    Accelerate DGL csr neighbor sampling (#13588)
    
    * Speedup and fix bug in dgl_csr_sampling op
    
    * Update dgl_graph.cc
    
    * simplify functions.
    
    * avoid adding nodes in the last level in the queue.
    
    * remove a hashtable lookup in neigh_pos.
    
    * reduce a hashtable lookup in sub_ver_mp.
    
    * merge copying vids and layers.
    
    * reduce hashtable lookup when writing to output csr.
    
    * fix a bug.
    
    * limit the number of sampled vertices.
    
    * fix lint.
    
    * fix a compile error.
    
    * fix compile error.
    
    * fix compile.
    
    * remove one hashtable lookup per vertex and hashtable iteration.
    
    * remove queue.
    
    * use vector for neigh_pos.
    
    * fix lint
    
    * avoid init output arrays.
    
    * fix tests.
    
    * fix tests.
    
    * update docs.
    
    * retrigger
    
    * retrigger
---
 src/operator/contrib/dgl_graph.cc       | 317 ++++++++++++++++----------------
 tests/python/unittest/test_dgl_graph.py |  22 +--
 2 files changed, 170 insertions(+), 169 deletions(-)

diff --git a/src/operator/contrib/dgl_graph.cc b/src/operator/contrib/dgl_graph.cc
index 6d58675..a03cbef 100644
--- a/src/operator/contrib/dgl_graph.cc
+++ b/src/operator/contrib/dgl_graph.cc
@@ -413,21 +413,6 @@ static bool CSRNeighborNonUniformSampleType(const nnvm::NodeAttrs& attrs,
   return success;
 }
 
-/*
- * Get src vertex and edge id for a destination vertex
- */
-static void GetSrcList(const dgl_id_t* val_list,
-                       const dgl_id_t* col_list,
-                       const dgl_id_t* indptr,
-                       const dgl_id_t dst_id,
-                       std::vector<dgl_id_t>* src_list,
-                       std::vector<dgl_id_t>* edge_list) {
-  for (dgl_id_t i = *(indptr+dst_id); i < *(indptr+dst_id+1); ++i) {
-    src_list->push_back(col_list[i]);
-    edge_list->push_back(val_list[i]);
-  }
-}
-
 static void RandomSample(size_t set_size,
                          size_t num,
                          std::vector<size_t>* out,
@@ -464,34 +449,34 @@ static void NegateSet(const std::vector<size_t> &idxs,
 /*
  * Uniform sample
  */
-static void GetUniformSample(const std::vector<dgl_id_t>& ver_list,
-                             const std::vector<dgl_id_t>& edge_list,
+static void GetUniformSample(const dgl_id_t* val_list,
+                             const dgl_id_t* col_list,
+                             const size_t ver_len,
                              const size_t max_num_neighbor,
                              std::vector<dgl_id_t>* out_ver,
                              std::vector<dgl_id_t>* out_edge,
                              unsigned int* seed) {
-  CHECK_EQ(ver_list.size(), edge_list.size());
   // Copy ver_list to output
-  if (ver_list.size() <= max_num_neighbor) {
-    for (size_t i = 0; i < ver_list.size(); ++i) {
-      out_ver->push_back(ver_list[i]);
-      out_edge->push_back(edge_list[i]);
+  if (ver_len <= max_num_neighbor) {
+    for (size_t i = 0; i < ver_len; ++i) {
+      out_ver->push_back(col_list[i]);
+      out_edge->push_back(val_list[i]);
     }
     return;
   }
   // If we just sample a small number of elements from a large neighbor list.
   std::vector<size_t> sorted_idxs;
-  if (ver_list.size() > max_num_neighbor * 2) {
+  if (ver_len > max_num_neighbor * 2) {
     sorted_idxs.reserve(max_num_neighbor);
-    RandomSample(ver_list.size(), max_num_neighbor, &sorted_idxs, seed);
+    RandomSample(ver_len, max_num_neighbor, &sorted_idxs, seed);
     std::sort(sorted_idxs.begin(), sorted_idxs.end());
   } else {
     std::vector<size_t> negate;
-    negate.reserve(ver_list.size() - max_num_neighbor);
-    RandomSample(ver_list.size(), ver_list.size() - max_num_neighbor,
+    negate.reserve(ver_len - max_num_neighbor);
+    RandomSample(ver_len, ver_len - max_num_neighbor,
                  &negate, seed);
     std::sort(negate.begin(), negate.end());
-    NegateSet(negate, ver_list.size(), &sorted_idxs);
+    NegateSet(negate, ver_len, &sorted_idxs);
   }
   // verify the result.
   CHECK_EQ(sorted_idxs.size(), max_num_neighbor);
@@ -499,8 +484,8 @@ static void GetUniformSample(const std::vector<dgl_id_t>& ver_list,
     CHECK_GT(sorted_idxs[i], sorted_idxs[i - 1]);
   }
   for (auto idx : sorted_idxs) {
-    out_ver->push_back(ver_list[idx]);
-    out_edge->push_back(edge_list[idx]);
+    out_ver->push_back(col_list[idx]);
+    out_edge->push_back(val_list[idx]);
   }
 }
 
@@ -508,26 +493,26 @@ static void GetUniformSample(const std::vector<dgl_id_t>& ver_list,
  * Non-uniform sample via ArrayHeap
  */
 static void GetNonUniformSample(const float* probability,
-                                const std::vector<dgl_id_t>& ver_list,
-                                const std::vector<dgl_id_t>& edge_list,
+                                const dgl_id_t* val_list,
+                                const dgl_id_t* col_list,
+                                const size_t ver_len,
                                 const size_t max_num_neighbor,
                                 std::vector<dgl_id_t>* out_ver,
                                 std::vector<dgl_id_t>* out_edge,
                                 unsigned int* seed) {
-  CHECK_EQ(ver_list.size(), edge_list.size());
   // Copy ver_list to output
-  if (ver_list.size() <= max_num_neighbor) {
-    for (size_t i = 0; i < ver_list.size(); ++i) {
-      out_ver->push_back(ver_list[i]);
-      out_edge->push_back(edge_list[i]);
+  if (ver_len <= max_num_neighbor) {
+    for (size_t i = 0; i < ver_len; ++i) {
+      out_ver->push_back(col_list[i]);
+      out_edge->push_back(val_list[i]);
     }
     return;
   }
   // Make sample
   std::vector<size_t> sp_index(max_num_neighbor);
-  std::vector<float> sp_prob(ver_list.size());
-  for (size_t i = 0; i < ver_list.size(); ++i) {
-    sp_prob[i] = probability[ver_list[i]];
+  std::vector<float> sp_prob(ver_len);
+  for (size_t i = 0; i < ver_len; ++i) {
+    sp_prob[i] = probability[col_list[i]];
   }
   ArrayHeap arrayHeap(sp_prob);
   arrayHeap.SampleWithoutReplacement(max_num_neighbor, &sp_index, seed);
@@ -535,22 +520,14 @@ static void GetNonUniformSample(const float* probability,
   out_edge->resize(max_num_neighbor);
   for (size_t i = 0; i < max_num_neighbor; ++i) {
     size_t idx = sp_index[i];
-    out_ver->at(i) = ver_list[idx];
-    out_edge->at(i) = edge_list[idx];
+    out_ver->at(i) = col_list[idx];
+    out_edge->at(i) = val_list[idx];
   }
   sort(out_ver->begin(), out_ver->end());
   sort(out_edge->begin(), out_edge->end());
 }
 
 /*
- * This is used for BFS traversal
- */
-struct ver_node {
-  dgl_id_t vertex_id;
-  int level;
-};
-
-/*
  * Used for subgraph sampling
  */
 struct neigh_list {
@@ -571,9 +548,9 @@ static void SampleSubgraph(const NDArray &csr,
                            float* sub_prob,
                            const NDArray &sub_layer,
                            const float* probability,
-                           dgl_id_t num_hops,
-                           dgl_id_t num_neighbor,
-                           dgl_id_t max_num_vertices) {
+                           int num_hops,
+                           size_t num_neighbor,
+                           size_t max_num_vertices) {
   unsigned int time_seed = time(nullptr);
   size_t num_seeds = seed_arr.shape().Size();
   CHECK_GE(max_num_vertices, num_seeds);
@@ -586,123 +563,119 @@ static void SampleSubgraph(const NDArray &csr,
   dgl_id_t* out_layer = sub_layer.data().dptr<dgl_id_t>();
 
   // BFS traverse the graph and sample vertices
-  dgl_id_t sub_vertices_count = 0;
   // <vertex_id, layer_id>
-  std::unordered_map<dgl_id_t, int> sub_ver_mp;
-  std::queue<ver_node> node_queue;
+  std::unordered_set<dgl_id_t> sub_ver_mp;
+  std::vector<std::pair<dgl_id_t, dgl_id_t> > sub_vers;
+  sub_vers.reserve(num_seeds * 10);
   // add seed vertices
   for (size_t i = 0; i < num_seeds; ++i) {
-    ver_node node;
-    node.vertex_id = seed[i];
-    node.level = 0;
-    node_queue.push(node);
+    auto ret = sub_ver_mp.insert(seed[i]);
+    // If the vertex is inserted successfully.
+    if (ret.second) {
+      sub_vers.emplace_back(seed[i], 0);
+    }
   }
-  std::vector<dgl_id_t> tmp_src_list;
-  std::vector<dgl_id_t> tmp_edge_list;
   std::vector<dgl_id_t> tmp_sampled_src_list;
   std::vector<dgl_id_t> tmp_sampled_edge_list;
-  std::unordered_map<dgl_id_t, neigh_list> neigh_mp;
+  // ver_id, position
+  std::vector<std::pair<dgl_id_t, size_t> > neigh_pos;
+  neigh_pos.reserve(num_seeds);
+  std::vector<dgl_id_t> neighbor_list;
   size_t num_edges = 0;
-  while (!node_queue.empty() &&
-    sub_vertices_count <= max_num_vertices ) {
-    ver_node& cur_node = node_queue.front();
-    dgl_id_t dst_id = cur_node.vertex_id;
-    if (cur_node.level < num_hops) {
-      auto ret = sub_ver_mp.find(dst_id);
-      if (ret != sub_ver_mp.end()) {
-        node_queue.pop();
-        continue;
-      }
-      tmp_src_list.clear();
-      tmp_edge_list.clear();
-      tmp_sampled_src_list.clear();
-      tmp_sampled_edge_list.clear();
-      GetSrcList(val_list,
-                 col_list,
-                 indptr,
-                 dst_id,
-                 &tmp_src_list,
-                 &tmp_edge_list);
-      if (probability == nullptr) {  // uniform-sample
-        GetUniformSample(tmp_src_list,
-                       tmp_edge_list,
+
+  // sub_vers is used both as a node collection and a queue.
+  // In the while loop, we iterate over sub_vers and new nodes are added to the vector.
+  // A vertex in the vector only needs to be accessed once. If there is a vertex behind idx
+  // isn't in the last level, we will sample its neighbors. If not, the while loop terminates.
+  size_t idx = 0;
+  while (idx < sub_vers.size() &&
+    sub_ver_mp.size() < max_num_vertices) {
+    dgl_id_t dst_id = sub_vers[idx].first;
+    int cur_node_level = sub_vers[idx].second;
+    idx++;
+    // If the node is in the last level, we don't need to sample neighbors
+    // from this node.
+    if (cur_node_level >= num_hops)
+      continue;
+
+    tmp_sampled_src_list.clear();
+    tmp_sampled_edge_list.clear();
+    dgl_id_t ver_len = *(indptr+dst_id+1) - *(indptr+dst_id);
+    if (probability == nullptr) {  // uniform-sample
+      GetUniformSample(val_list + *(indptr + dst_id),
+                       col_list + *(indptr + dst_id),
+                       ver_len,
                        num_neighbor,
                        &tmp_sampled_src_list,
                        &tmp_sampled_edge_list,
                        &time_seed);
-      } else {  // non-uniform-sample
-        GetNonUniformSample(probability,
-                       tmp_src_list,
-                       tmp_edge_list,
+    } else {  // non-uniform-sample
+      GetNonUniformSample(probability,
+                       val_list + *(indptr + dst_id),
+                       col_list + *(indptr + dst_id),
+                       ver_len,
                        num_neighbor,
                        &tmp_sampled_src_list,
                        &tmp_sampled_edge_list,
                        &time_seed);
-      }
-      neigh_mp.insert(std::pair<dgl_id_t, neigh_list>(dst_id,
-        neigh_list(tmp_sampled_src_list,
-                   tmp_sampled_edge_list)));
-      num_edges += tmp_sampled_src_list.size();
-      sub_ver_mp[cur_node.vertex_id] = cur_node.level;
-      for (size_t i = 0; i < tmp_sampled_src_list.size(); ++i) {
-        auto ret = sub_ver_mp.find(tmp_sampled_src_list[i]);
-        if (ret == sub_ver_mp.end()) {
-          ver_node new_node;
-          new_node.vertex_id = tmp_sampled_src_list[i];
-          new_node.level = cur_node.level + 1;
-          node_queue.push(new_node);
-        }
-      }
-    } else {  // vertex without any neighbor
-      auto ret = sub_ver_mp.find(dst_id);
-      if (ret != sub_ver_mp.end()) {
-        node_queue.pop();
-        continue;
-      }
-      tmp_sampled_src_list.clear();
-      tmp_sampled_edge_list.clear();
-      neigh_mp.insert(std::pair<dgl_id_t, neigh_list>(dst_id,
-        neigh_list(tmp_sampled_src_list,      // empty vector
-                   tmp_sampled_edge_list)));  // empty vector
-      sub_ver_mp[cur_node.vertex_id] = cur_node.level;
     }
-    sub_vertices_count++;
-    node_queue.pop();
+    CHECK_EQ(tmp_sampled_src_list.size(), tmp_sampled_edge_list.size());
+    size_t pos = neighbor_list.size();
+    neigh_pos.emplace_back(dst_id, pos);
+    // First we push the size of neighbor vector
+    neighbor_list.push_back(tmp_sampled_edge_list.size());
+    // Then push the vertices
+    for (size_t i = 0; i < tmp_sampled_src_list.size(); ++i) {
+      neighbor_list.push_back(tmp_sampled_src_list[i]);
+    }
+    // Finally we push the edge list
+    for (size_t i = 0; i < tmp_sampled_edge_list.size(); ++i) {
+      neighbor_list.push_back(tmp_sampled_edge_list[i]);
+    }
+    num_edges += tmp_sampled_src_list.size();
+    for (size_t i = 0; i < tmp_sampled_src_list.size(); ++i) {
+      // If we have sampled the max number of vertices, we have to stop.
+      if (sub_ver_mp.size() >= max_num_vertices)
+        break;
+      // We need to add the neighbor in the hashtable here. This ensures that
+      // the vertex in the queue is unique. If we see a vertex before, we don't
+      // need to add it to the queue again.
+      auto ret = sub_ver_mp.insert(tmp_sampled_src_list[i]);
+      // If the sampled neighbor is inserted to the map successfully.
+      if (ret.second)
+        sub_vers.emplace_back(tmp_sampled_src_list[i], cur_node_level + 1);
+    }
+  }
+  // Let's check if there is a vertex that we haven't sampled its neighbors.
+  for (; idx < sub_vers.size(); idx++) {
+    if (sub_vers[idx].second < num_hops) {
+      LOG(WARNING)
+        << "The sampling is truncated because we have reached the max number of vertices\n"
+        << "Please use a smaller number of seeds or a small neighborhood";
+      break;
+    }
   }
 
   // Copy sub_ver_mp to output[0]
-  size_t idx = 0;
-  for (auto& data : sub_ver_mp) {
-    *(out+idx) = data.first;
-    idx++;
-  }
+  // Copy layer
   size_t num_vertices = sub_ver_mp.size();
-  std::sort(out, out + num_vertices);
-  // The rest data will be set to -1
-  for (dgl_id_t i = idx; i < max_num_vertices; ++i) {
-    *(out+i) = -1;
+  std::sort(sub_vers.begin(), sub_vers.end(),
+            [](const std::pair<dgl_id_t, dgl_id_t> &a1, const std::pair<dgl_id_t, dgl_id_t> &a2) {
+    return a1.first < a2.first;
+  });
+  for (size_t i = 0; i < sub_vers.size(); i++) {
+    out[i] = sub_vers[i].first;
+    out_layer[i] = sub_vers[i].second;
   }
   // The last element stores the actual
   // number of vertices in the subgraph.
   out[max_num_vertices] = sub_ver_mp.size();
+
   // Copy sub_probability
   if (sub_prob != nullptr) {
-    for (dgl_id_t i = 0; i < max_num_vertices; ++i) {
+    for (size_t i = 0; i < sub_ver_mp.size(); ++i) {
       dgl_id_t idx = out[i];
-      if (idx != -1) {
-        sub_prob[i] = probability[idx];
-      } else {
-        sub_prob[i] = -1;
-      }
-    }
-  }
-  // Copy layer
-  for (dgl_id_t i = 0; i < max_num_vertices; ++i) {
-    dgl_id_t idx = out[i];
-    if (idx != -1) {
-      out_layer[i] = sub_ver_mp[idx];
-    } else {
-      out_layer[i] = -1;
+      sub_prob[i] = probability[idx];
     }
   }
   // Construct sub_csr_graph
@@ -718,20 +691,37 @@ static void SampleSubgraph(const NDArray &csr,
   dgl_id_t* indptr_out = sub_csr.aux_data(0).dptr<dgl_id_t>();
   indptr_out[0] = 0;
   size_t collected_nedges = 0;
+
+  // Both the out array and neigh_pos are sorted. By scanning the two arrays, we can see
+  // which vertices have neighbors and which don't.
+  std::sort(neigh_pos.begin(), neigh_pos.end(),
+            [](const std::pair<dgl_id_t, size_t> &a1, const std::pair<dgl_id_t, size_t> &a2) {
+    return a1.first < a2.first;
+  });
+  size_t idx_with_neigh = 0;
   for (size_t i = 0; i < num_vertices; i++) {
     dgl_id_t dst_id = *(out + i);
-    auto it = neigh_mp.find(dst_id);
-    const auto &edges = it->second.edges;
-    const auto &neighs = it->second.neighs;
-    CHECK_EQ(edges.size(), neighs.size());
-    if (!edges.empty()) {
-      std::copy(edges.begin(), edges.end(), val_list_out + collected_nedges);
-      std::copy(neighs.begin(), neighs.end(), col_list_out + collected_nedges);
-      collected_nedges += edges.size();
+    // If a vertex is in sub_ver_mp but not in neigh_pos, this vertex must not
+    // have edges.
+    size_t edge_size = 0;
+    if (idx_with_neigh < neigh_pos.size() && dst_id == neigh_pos[idx_with_neigh].first) {
+      size_t pos = neigh_pos[idx_with_neigh].second;
+      CHECK_LT(pos, neighbor_list.size());
+      edge_size = neighbor_list[pos];
+      CHECK_LE(pos + edge_size * 2 + 1, neighbor_list.size());
+
+      std::copy_n(neighbor_list.begin() + pos + 1,
+                  edge_size,
+                  col_list_out + collected_nedges);
+      std::copy_n(neighbor_list.begin() + pos + edge_size + 1,
+                  edge_size,
+                  val_list_out + collected_nedges);
+      collected_nedges += edge_size;
+      idx_with_neigh++;
     }
-    indptr_out[i+1] = indptr_out[i] + edges.size();
+    indptr_out[i+1] = indptr_out[i] + edge_size;
   }
-  for (dgl_id_t i = num_vertices+1; i <= max_num_vertices; ++i) {
+  for (size_t i = num_vertices+1; i <= max_num_vertices; ++i) {
     indptr_out[i] = indptr_out[i-1];
   }
 }
@@ -766,8 +756,16 @@ static void CSRNeighborUniformSampleComputeExCPU(const nnvm::NodeAttrs& attrs,
 }
 
 NNVM_REGISTER_OP(_contrib_dgl_csr_neighbor_uniform_sample)
-.describe(R"code(This operator samples sub-graph from a csr graph via an
-uniform probability. 
+.describe(R"code(This operator samples sub-graphs from a csr graph via an
+uniform probability. The operator is designed for DGL.
+
+The operator outputs three sets of NDArrays to represent the sampled results
+(the number of NDArrays in each set is the same as the number of seed NDArrays):
+1) a set of 1D NDArrays containing the sampled vertices, 2) a set of CSRNDArrays representing
+the sampled edges, 3) a set of 1D NDArrays indicating the layer where a vertex is sampled.
+The first set of 1D NDArrays have a length of max_num_vertices+1. The last element in an NDArray
+indicate the acutal number of vertices in a subgraph. The third set of NDArrays have a length
+of max_num_vertices, and the valid number of vertices is the same as the ones in the first set.
 
 Example:
 
@@ -853,7 +851,16 @@ static void CSRNeighborNonUniformSampleComputeExCPU(const nnvm::NodeAttrs& attrs
 
 NNVM_REGISTER_OP(_contrib_dgl_csr_neighbor_non_uniform_sample)
 .describe(R"code(This operator samples sub-graph from a csr graph via an
-uniform probability. 
+non-uniform probability. The operator is designed for DGL.
+
+The operator outputs four sets of NDArrays to represent the sampled results
+(the number of NDArrays in each set is the same as the number of seed NDArrays):
+1) a set of 1D NDArrays containing the sampled vertices, 2) a set of CSRNDArrays representing
+the sampled edges, 3) a set of 1D NDArrays with the probability that vertices are sampled,
+4) a set of 1D NDArrays indicating the layer where a vertex is sampled.
+The first set of 1D NDArrays have a length of max_num_vertices+1. The last element in an NDArray
+indicate the acutal number of vertices in a subgraph. The third and fourth set of NDArrays have a length
+of max_num_vertices, and the valid number of vertices is the same as the ones in the first set.
 
 Example:
 
diff --git a/tests/python/unittest/test_dgl_graph.py b/tests/python/unittest/test_dgl_graph.py
index 069fef6..e24cf4d 100644
--- a/tests/python/unittest/test_dgl_graph.py
+++ b/tests/python/unittest/test_dgl_graph.py
@@ -32,15 +32,12 @@ def check_uniform(out, num_hops, max_num_vertices):
     layer = out[2]
     # check sample_id
     assert (len(sample_id) == max_num_vertices+1)
-    count = 0
-    for data in sample_id:
-        if data != -1:
-            count = count + 1
-    assert (mx.nd.array([count-1], dtype=np.int64) == sample_id[-1])
+    num_vertices = sample_id[-1].asnumpy()[0]
     # check sub_csr
     sub_csr.check_format(full_check=True)
+    assert np.all((sub_csr.indptr[num_vertices:] == sub_csr.indptr[num_vertices]).asnumpy())
     # check layer
-    for data in layer:
+    for data in layer[:num_vertices]:
         assert(data <= num_hops)
 
 def check_non_uniform(out, num_hops, max_num_vertices):
@@ -50,17 +47,14 @@ def check_non_uniform(out, num_hops, max_num_vertices):
     layer = out[3]
     # check sample_id
     assert (len(sample_id) == max_num_vertices+1)
-    count = 0
-    for data in sample_id:
-        if data != -1:
-            count = count + 1
-    assert (mx.nd.array([count-1], dtype=np.int64) == sample_id[-1])
+    num_vertices = sample_id[-1].asnumpy()[0]
     # check sub_csr
     sub_csr.check_format(full_check=True)
+    assert np.all((sub_csr.indptr[num_vertices:] == sub_csr.indptr[num_vertices]).asnumpy())
     # check prob
     assert (len(prob) == max_num_vertices)
     # check layer
-    for data in layer:
+    for data in layer[:num_vertices]:
         assert(data <= num_hops)
 
 def check_compact(csr, id_arr, num_nodes):
@@ -101,9 +95,9 @@ def test_uniform_sample():
     check_compact(out[1], out[0], num_nodes)
 
     seed = mx.nd.array([0], dtype=np.int64)
-    out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2, num_hops=2, num_neighbor=1, max_num_vertices=4)
+    out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2, num_hops=2, num_neighbor=1, max_num_vertices=3)
     assert (len(out) == 3)
-    check_uniform(out, num_hops=2, max_num_vertices=4)
+    check_uniform(out, num_hops=2, max_num_vertices=3)
     num_nodes = out[0][-1].asnumpy()
     assert num_nodes > 0
     assert num_nodes < len(out[0])