You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by ha...@apache.org on 2017/04/19 19:05:21 UTC

incubator-quickstep git commit: Topological sort functionality in DAG

Repository: incubator-quickstep
Updated Branches:
  refs/heads/master 563abe043 -> 6e3499a80


Topological sort functionality in DAG

- Implemented a very simple Kahn's algorithm for topological sorting of nodes
  in the DAG class.


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/6e3499a8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/6e3499a8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/6e3499a8

Branch: refs/heads/master
Commit: 6e3499a80c559cb3ab11b9800cf5813b0f233f77
Parents: 563abe0
Author: Harshad Deshmukh <hb...@apache.org>
Authored: Tue Apr 18 15:34:16 2017 -0500
Committer: Harshad Deshmukh <hb...@apache.org>
Committed: Wed Apr 19 11:06:38 2017 -0500

----------------------------------------------------------------------
 utility/DAG.hpp                | 51 +++++++++++++++++++++++
 utility/tests/DAG_unittest.cpp | 82 +++++++++++++++++++++++++++++++++++++
 2 files changed, 133 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6e3499a8/utility/DAG.hpp
----------------------------------------------------------------------
diff --git a/utility/DAG.hpp b/utility/DAG.hpp
index a1f2619..c286880 100644
--- a/utility/DAG.hpp
+++ b/utility/DAG.hpp
@@ -22,6 +22,7 @@
 
 #include <cstddef>
 #include <memory>
+#include <queue>
 #include <unordered_map>
 #include <unordered_set>
 #include <utility>
@@ -246,6 +247,11 @@ class DAG {
     return nodes_[node_index].getDependents().end();
   }
 
+  /**
+   * @brief Get a topologically sorted list of node IDs.
+   **/
+  std::vector<size_type_nodes> getTopologicalSorting() const;
+
  private:
   /**
    * @brief A node in the DAG which contains a payload. DAGNode owns its
@@ -489,6 +495,51 @@ bool DAG<T, LinkMetadataT>::hasCycleHelper(const typename DAG<T, LinkMetadataT>:
   return false;
 }
 
+template <class T, class LinkMetadataT>
+std::vector<typename DAG<T, LinkMetadataT>::size_type_nodes>
+DAG<T, LinkMetadataT>::getTopologicalSorting() const {
+  // As a clarification, if A->B then A is the dependency for B and B is dependent on A.
+  // We implement "Kahn's algorithm" for the sorting.
+  DCHECK(!hasCycle());
+  // This list is going to be the topologically sorted output.
+  std::unique_ptr<std::vector<typename DAG<T, LinkMetadataT>::size_type_nodes>>
+      sorted_list(new std::vector<size_type_nodes>());
+  sorted_list->reserve(this->size());
+  // Key = node ID, value = # incoming edges for this node.
+  // NOTE(harshad) - We modify the "values" in this map as we go along.
+  std::unordered_map<typename DAG<T, LinkMetadataT>::size_type_nodes,
+                     std::size_t> num_dependencies;
+  std::queue<typename DAG<T, LinkMetadataT>::size_type_nodes> nodes_with_no_dependencies;
+  // First store the nodes without any dependencies in a list.
+  // Also remember the number of dependencies for each node in a map.
+  for (auto node_id = 0u; node_id < this->size(); ++node_id) {
+    if (nodes_[node_id].getDependencies().empty()) {
+      nodes_with_no_dependencies.emplace(node_id);
+    }
+    num_dependencies[node_id] = nodes_[node_id].getDependencies().size();
+  }
+  // The algorithm begins now.
+  while (!nodes_with_no_dependencies.empty()) {
+    // For a node with no dependencies ...
+    auto curr_node = nodes_with_no_dependencies.front();
+    nodes_with_no_dependencies.pop();
+    // Add the node to the sorted list.
+    sorted_list->emplace_back(curr_node);
+    auto dependents_of_curr_node = getDependents(curr_node);
+    for (auto dependent_iterator : dependents_of_curr_node) {
+      // For each dependent of the current node ...
+      auto dependent_node_id = dependent_iterator.first;
+      // Remove the incoming edge from curr_node.
+      DCHECK_GE(num_dependencies[dependent_node_id], 1u);
+      if (--num_dependencies[dependent_node_id] == 0) {
+        // Now this node has no children.
+        nodes_with_no_dependencies.emplace(dependent_node_id);
+      }
+    }
+  }
+  return *(sorted_list.release());
+}
+
 /** @} */
 
 }  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/6e3499a8/utility/tests/DAG_unittest.cpp
----------------------------------------------------------------------
diff --git a/utility/tests/DAG_unittest.cpp b/utility/tests/DAG_unittest.cpp
index 3fe2990..3e8d167 100644
--- a/utility/tests/DAG_unittest.cpp
+++ b/utility/tests/DAG_unittest.cpp
@@ -490,6 +490,88 @@ TEST(DAGTest, LinkMetadataBoolTest) {
   EXPECT_FALSE(dag_.getLinkMetadata(1, 0));
 }
 
+TEST(DAGTest, TopoSortTest) {
+  const int kNodeSize = 5;
+  DAG<DummyPayload, int> dag_;
+
+  for (int node_index = 0; node_index < kNodeSize; ++node_index) {
+    ASSERT_EQ(static_cast<std::size_t>(node_index),
+              dag_.createNode(new DummyPayload(node_index)));
+  }
+
+  /*
+   *    0
+   *   / \
+   *  v   v
+   *  1   2
+   *   \ /
+   *    v
+   *    3
+   *    |
+   *    v
+   *    4
+   *
+   */
+
+  dag_.createLink(0, 1, 2);
+  dag_.createLink(0, 2, 1);
+  dag_.createLink(1, 3, 1);
+  dag_.createLink(2, 3, 1);
+  dag_.createLink(3, 4, 1);
+
+  vector<DAG<DummyPayload, int>::size_type_nodes> sorted_list =
+      dag_.getTopologicalSorting();
+  std::unordered_map<DAG<DummyPayload, int>::size_type_nodes, bool> node_exists;
+  // First check if the ordering is a legal sequence of nodes, i.e. every node
+  // appears exactly once.
+  for (auto node_id = 0u; node_id < dag_.size(); ++node_id) {
+    node_exists[node_id] = false;
+  }
+  for (auto i : sorted_list) {
+    node_exists[i] = true;
+  }
+  for (auto node_id = 0u; node_id < dag_.size(); ++node_id) {
+    ASSERT_TRUE(node_exists[node_id]);
+  }
+  // We apply the following condition for verifying if we have obtained a valid
+  // topological sorting.
+  // If there is an edge i->j between two nodes i and j, then i comes before j
+  // in the topologically sorted order.
+  // We use a map to store the position of a given node in the sorted order.
+  // Key = node ID, value = position of the node in the sorted order.
+  std::unordered_map<std::size_t, std::size_t> position_in_sorted_order;
+  for (std::size_t i = 0; i < sorted_list.size(); ++i) {
+    position_in_sorted_order[sorted_list[i]] = i;
+  }
+  std::vector<std::tuple<std::size_t, std::size_t>> edges;
+  // Populate the list of edges.
+  edges.emplace_back(0, 1);
+  edges.emplace_back(0, 2);
+  edges.emplace_back(1, 3);
+  edges.emplace_back(2, 3);
+  edges.emplace_back(3, 4);
+  for (auto curr_edge : edges) {
+    // (i, j) : i is "from node", j is "to node".
+    std::size_t from_node_position = position_in_sorted_order[std::get<0>(curr_edge)];
+    std::size_t to_node_position = position_in_sorted_order[std::get<1>(curr_edge)];
+    ASSERT_LT(from_node_position, to_node_position);
+  }
+  // Now extend the same logic that we applied for edges for paths in the DAG.
+  // We have already verified paths with length = 1 (edges), so we will only
+  // consider paths with length more than one.
+  std::vector<std::tuple<std::size_t, std::size_t>> paths;
+  paths.emplace_back(0, 3);
+  paths.emplace_back(0, 4);
+  paths.emplace_back(1, 4);
+  paths.emplace_back(2, 4);
+  for (auto curr_path : paths) {
+    // (i, j) : i is "from node", j is "to node".
+    std::size_t from_node_position = position_in_sorted_order[std::get<0>(curr_path)];
+    std::size_t to_node_position = position_in_sorted_order[std::get<1>(curr_path)];
+    ASSERT_LT(from_node_position, to_node_position);
+  }
+}
+
 #ifdef QUICKSTEP_DEBUG
 TEST(DAGDeathTest, SixNodeStagesCycleTest) {
   const int kNodeSize = 6;