You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by zu...@apache.org on 2016/05/30 22:47:32 UTC

[24/32] incubator-quickstep git commit: Visualize optimized physical plan in DOT format (#232)

Visualize optimized physical plan in DOT format (#232)

Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/1605fd84
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/1605fd84
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/1605fd84

Branch: refs/heads/master
Commit: 1605fd8445a59a7cc38bf23088d9af434a2b75eb
Parents: d136e1d
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Fri May 20 16:47:31 2016 -0500
Committer: Zuyu Zhang <zz...@pivotal.io>
Committed: Mon May 30 15:47:52 2016 -0700

----------------------------------------------------------------------
 query_optimizer/CMakeLists.txt        |   3 +-
 query_optimizer/PhysicalGenerator.cpp |  13 ++-
 utility/CMakeLists.txt                |  13 +++
 utility/PlanVisualizer.cpp            | 161 +++++++++++++++++++++++++++++
 utility/PlanVisualizer.hpp            |  94 +++++++++++++++++
 utility/StringUtil.hpp                |  30 +++++-
 6 files changed, 310 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1605fd84/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index aa2873e..5c9438d 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -194,7 +194,8 @@ target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator
                       quickstep_queryoptimizer_strategy_Selection
                       quickstep_queryoptimizer_strategy_Strategy
                       quickstep_queryoptimizer_Validator
-                      quickstep_utility_Macros)
+                      quickstep_utility_Macros
+                      quickstep_utility_PlanVisualizer)
 target_link_libraries(quickstep_queryoptimizer_QueryHandle
                       quickstep_catalog_Catalog_proto
                       quickstep_queryexecution_QueryContext_proto

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1605fd84/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp
index 662236f..75a7bc9 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -33,6 +33,7 @@
 #include "query_optimizer/strategy/OneToOne.hpp"
 #include "query_optimizer/strategy/Selection.hpp"
 #include "query_optimizer/strategy/Strategy.hpp"
+#include "utility/PlanVisualizer.hpp"
 
 #include "gflags/gflags.h"
 
@@ -45,7 +46,12 @@ DEFINE_bool(reorder_hash_joins, true,
             "If true, apply hash join order optimization to each group of hash "
             "joins. The optimization applies a greedy algorithm to favor smaller "
             "cardinality and selective tables to be joined first, which is suitable "
-            "for queries on star-schema tables");
+            "for queries on star-schema tables.");
+
+DEFINE_bool(visualize_plan, false,
+            "If true, visualize the final physical plan into a graph in DOT format "
+            "(DOT is a plain text graph description language). Then print the "
+            "generated graph through stderr.");
 
 namespace L = ::quickstep::optimizer::logical;
 namespace P = ::quickstep::optimizer::physical;
@@ -101,6 +107,11 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
 
   DVLOG(4) << "Optimized physical plan:\n" << physical_plan_->toString();
 
+  if (FLAGS_visualize_plan) {
+  quickstep::PlanVisualizer plan_visualizer;
+    std::cerr << "\n" << plan_visualizer.visualize(physical_plan_) << "\n";
+  }
+
 #ifdef QUICKSTEP_DEBUG
   Validate(physical_plan_);
 #endif

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1605fd84/utility/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/utility/CMakeLists.txt b/utility/CMakeLists.txt
index 6d1eeab..2d3db8f 100644
--- a/utility/CMakeLists.txt
+++ b/utility/CMakeLists.txt
@@ -171,6 +171,7 @@ add_library(quickstep_utility_Glob Glob.cpp Glob.hpp)
 add_library(quickstep_utility_HashPair ../empty_src.cpp HashPair.hpp)
 add_library(quickstep_utility_Macros ../empty_src.cpp Macros.hpp)
 add_library(quickstep_utility_MemStream ../empty_src.cpp MemStream.hpp)
+add_library(quickstep_utility_PlanVisualizer PlanVisualizer.cpp PlanVisualizer.hpp)
 add_library(quickstep_utility_PrimeNumber PrimeNumber.cpp PrimeNumber.hpp)
 add_library(quickstep_utility_PtrList ../empty_src.cpp PtrList.hpp)
 add_library(quickstep_utility_PtrMap ../empty_src.cpp PtrMap.hpp)
@@ -231,6 +232,17 @@ target_link_libraries(quickstep_utility_MemStream
                       quickstep_utility_Macros)
 target_link_libraries(quickstep_utility_PrimeNumber
                       glog)
+target_link_libraries(quickstep_utility_PlanVisualizer
+                      quickstep_catalog_CatalogRelation
+                      quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel
+                      quickstep_queryoptimizer_expressions_AttributeReference
+                      quickstep_queryoptimizer_physical_HashJoin
+                      quickstep_queryoptimizer_physical_Physical
+                      quickstep_queryoptimizer_physical_PhysicalType
+                      quickstep_queryoptimizer_physical_TableReference
+                      quickstep_queryoptimizer_physical_TopLevelPlan
+                      quickstep_utility_Macros
+                      quickstep_utility_StringUtil)
 target_link_libraries(quickstep_utility_PtrList
                       quickstep_utility_Macros)
 target_link_libraries(quickstep_utility_PtrMap
@@ -295,6 +307,7 @@ target_link_libraries(quickstep_utility
                       quickstep_utility_HashPair
                       quickstep_utility_Macros
                       quickstep_utility_MemStream
+                      quickstep_utility_PlanVisualizer
                       quickstep_utility_PrimeNumber
                       quickstep_utility_PtrList
                       quickstep_utility_PtrMap

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1605fd84/utility/PlanVisualizer.cpp
----------------------------------------------------------------------
diff --git a/utility/PlanVisualizer.cpp b/utility/PlanVisualizer.cpp
new file mode 100644
index 0000000..962d577
--- /dev/null
+++ b/utility/PlanVisualizer.cpp
@@ -0,0 +1,161 @@
+/**
+ *   Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ *     University of Wisconsin\u2014Madison.
+ *
+ *   Licensed under the Apache License, Version 2.0 (the "License");
+ *   you may not use this file except in compliance with the License.
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software
+ *   distributed under the License is distributed on an "AS IS" BASIS,
+ *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *   See the License for the specific language governing permissions and
+ *   limitations under the License.
+ **/
+
+#include "utility/PlanVisualizer.hpp"
+
+#include <cstddef>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "catalog/CatalogRelation.hpp"
+
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/PhysicalType.hpp"
+#include "query_optimizer/physical/TableReference.hpp"
+#include "query_optimizer/physical/TopLevelPlan.hpp"
+#include "utility/StringUtil.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+
+namespace E = ::quickstep::optimizer::expressions;
+namespace P = ::quickstep::optimizer::physical;
+namespace C = ::quickstep::optimizer::cost;
+
+std::string PlanVisualizer::visualize(const P::PhysicalPtr &input) {
+  DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan);
+  cost_model_.reset(
+      new C::StarSchemaSimpleCostModel(
+          std::static_pointer_cast<const P::TopLevelPlan>(input)->shared_subplans()));
+
+  color_map_["TableReference"] = "skyblue";
+  color_map_["Selection"] = "#90EE90";
+  color_map_["HashJoin"] = "red";
+  color_map_["HashLeftOuterJoin"] = "orange";
+  color_map_["HashLeftSemiJoin"] = "orange";
+  color_map_["HashLeftAntiJoin"] = "orange";
+
+  visit(input);
+
+  // Format output graph
+  std::ostringstream graph_oss;
+  graph_oss << "digraph g {\n";
+  graph_oss << "  rankdir=BT\n";
+  graph_oss << "  node [penwidth=2]\n";
+  graph_oss << "  edge [fontsize=16 fontcolor=gray penwidth=2]\n\n";
+
+  // Format nodes
+  for (const NodeInfo &node_info : nodes_) {
+    graph_oss << "  " << node_info.id << " [";
+    if (!node_info.labels.empty()) {
+      graph_oss << "label=\""
+                << EscapeSpecialChars(JoinToString(node_info.labels, "&#10;"))
+                << "\"";
+    }
+    if (!node_info.color.empty()) {
+      graph_oss << " style=filled fillcolor=\"" << node_info.color << "\"";
+    }
+    graph_oss << "]\n";
+  }
+  graph_oss << "\n";
+
+  // Format edges
+  for (const EdgeInfo &edge_info : edges_) {
+    graph_oss << "  " << edge_info.src_node_id << " -> "
+              << edge_info.dst_node_id << " [";
+    if (!edge_info.labels.empty()) {
+      graph_oss << "label=\""
+                << EscapeSpecialChars(JoinToString(edge_info.labels, "&#10;"))
+                << "\"";
+    }
+    graph_oss << "]\n";
+  }
+
+  graph_oss << "}\n";
+
+  return graph_oss.str();
+}
+
+void PlanVisualizer::visit(const P::PhysicalPtr &input) {
+  int node_id = ++id_counter_;
+  node_id_map_.emplace(input, node_id);
+
+  for (const auto &child : input->children()) {
+    visit(child);
+
+    int child_id = node_id_map_[child];
+
+    edges_.emplace_back(EdgeInfo());
+    EdgeInfo &edge_info = edges_.back();
+    edge_info.src_node_id = child_id;
+    edge_info.dst_node_id = node_id;
+
+    // Print output attributes except for TableReference -- there are just too many
+    // attributes out of TableReference.
+    if (child->getPhysicalType() != P::PhysicalType::kTableReference) {
+      for (const auto &attr : child->getOutputAttributes()) {
+        edge_info.labels.emplace_back(attr->attribute_alias());
+      }
+    }
+  }
+
+  nodes_.emplace_back(NodeInfo());
+  NodeInfo &node_info = nodes_.back();
+  node_info.id = node_id;
+  if (color_map_.find(input->getName()) != color_map_.end()) {
+    node_info.color = color_map_[input->getName()];
+  }
+
+  switch (input->getPhysicalType()) {
+    case P::PhysicalType::kTableReference: {
+      const P::TableReferencePtr table_reference =
+        std::static_pointer_cast<const P::TableReference>(input);
+      node_info.labels.emplace_back(table_reference->relation()->getName());
+      break;
+    }
+    case P::PhysicalType::kHashJoin: {
+      const P::HashJoinPtr hash_join =
+        std::static_pointer_cast<const P::HashJoin>(input);
+      node_info.labels.emplace_back(input->getName());
+
+      const auto &left_attributes = hash_join->left_join_attributes();
+      const auto &right_attributes = hash_join->right_join_attributes();
+      for (std::size_t i = 0; i < left_attributes.size(); ++i) {
+        node_info.labels.emplace_back(
+            left_attributes[i]->attribute_alias() + " = " + right_attributes[i]->attribute_alias());
+      }
+      break;
+    }
+    default: {
+      node_info.labels.emplace_back(input->getName());
+      break;
+    }
+  }
+  node_info.labels.emplace_back(
+      "est. # = " + std::to_string(cost_model_->estimateCardinality(input)));
+  node_info.labels.emplace_back(
+      "est. Selectivity = " + std::to_string(cost_model_->estimateSelectivity(input)));
+}
+
+}  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1605fd84/utility/PlanVisualizer.hpp
----------------------------------------------------------------------
diff --git a/utility/PlanVisualizer.hpp b/utility/PlanVisualizer.hpp
new file mode 100644
index 0000000..080b7de
--- /dev/null
+++ b/utility/PlanVisualizer.hpp
@@ -0,0 +1,94 @@
+/**
+ *   Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ *     University of Wisconsin\u2014Madison.
+ *
+ *   Licensed under the Apache License, Version 2.0 (the "License");
+ *   you may not use this file except in compliance with the License.
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software
+ *   distributed under the License is distributed on an "AS IS" BASIS,
+ *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *   See the License for the specific language governing permissions and
+ *   limitations under the License.
+ **/
+
+#ifndef QUICKSTEP_UTILITY_PLAN_VISUALIZER_HPP_
+#define QUICKSTEP_UTILITY_PLAN_VISUALIZER_HPP_
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+
+/** \addtogroup Utility
+ *  @{
+ */
+
+/**
+ * @brief A query plan visualizer that converts a physical plan into a graph in
+ *        DOT format. Note that DOT is a plain text graph description language.
+ *
+ * @note This utility tool can be further extended to be more generic.
+ */
+class PlanVisualizer {
+ public:
+  PlanVisualizer()
+      : id_counter_(0) {}
+
+  ~PlanVisualizer() {}
+
+  /**
+   * @brief Visualize the query plan into a graph in DOT format (DOT is a plain
+   *        text graph description language).
+   *
+   * @return The visualized query plan graph in DOT format.
+   */
+  std::string visualize(const optimizer::physical::PhysicalPtr &input);
+
+ private:
+  /**
+   * @brief Information of a graph node.
+   */
+  struct NodeInfo {
+    int id;
+    std::vector<std::string> labels;
+    std::string color;
+  };
+
+  /**
+   * @brief Information of a graph edge.
+   */
+  struct EdgeInfo {
+    int src_node_id;
+    int dst_node_id;
+    std::vector<std::string> labels;
+  };
+
+  void visit(const optimizer::physical::PhysicalPtr &input);
+
+  int id_counter_;
+  std::unordered_map<optimizer::physical::PhysicalPtr, int> node_id_map_;
+  std::unordered_map<std::string, std::string> color_map_;
+
+  std::vector<NodeInfo> nodes_;
+  std::vector<EdgeInfo> edges_;
+
+  std::unique_ptr<optimizer::cost::StarSchemaSimpleCostModel> cost_model_;
+
+  DISALLOW_COPY_AND_ASSIGN(PlanVisualizer);
+};
+
+/** @} */
+
+}  // namespace quickstep
+
+#endif /* QUICKSTEP_UTILITY_PLAN_VISUALIZER_HPP_ */

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1605fd84/utility/StringUtil.hpp
----------------------------------------------------------------------
diff --git a/utility/StringUtil.hpp b/utility/StringUtil.hpp
index bd138bb..6477ded 100644
--- a/utility/StringUtil.hpp
+++ b/utility/StringUtil.hpp
@@ -1,6 +1,8 @@
 /**
  *   Copyright 2011-2015 Quickstep Technologies LLC.
  *   Copyright 2015 Pivotal Software, Inc.
+ *   Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ *     University of Wisconsin\u2014Madison.
  *
  *   Licensed under the Apache License, Version 2.0 (the "License");
  *   you may not use this file except in compliance with the License.
@@ -19,6 +21,7 @@
 #define QUICKSTEP_UTILITY_STRING_UTIL_HPP_
 
 #include <cstdint>
+#include <sstream>
 #include <string>
 #include <vector>
 
@@ -34,7 +37,7 @@ namespace quickstep {
  * @param str The string to be converted.
  * @return The converted string with all lower case characters bing converted to upper case characters.
  */
-extern std::string ToLower(const std::string& str);
+extern std::string ToLower(const std::string &str);
 
 /**
  * @brief Converts special characters to escape characters.
@@ -42,7 +45,30 @@ extern std::string ToLower(const std::string& str);
  * @param text The string to be unescaped.
  * @return Unescaped string.
  */
-extern std::string EscapeSpecialChars(const std::string& text);
+extern std::string EscapeSpecialChars(const std::string &text);
+
+/**
+ * @brief Join all objects in a iterable container into a single string. The object
+ *        must have implemented the operator<< overloading with std::stringstream.
+ *
+ * @param container The iterable container of objects.
+ * @param separator A string to separate each object.
+ */
+template <typename ContainerType>
+std::string JoinToString(const ContainerType &container,
+                         const std::string &separator) {
+  std::ostringstream oss;
+  bool is_first = true;
+  for (const auto &item : container) {
+    if (is_first) {
+      is_first = false;
+    } else {
+      oss << separator;
+    }
+    oss << item;
+  }
+  return oss.str();
+}
 
 /**
  * @brief Parse a string of base-10 integers separated by delimiter characters