You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by zu...@apache.org on 2017/02/23 08:22:15 UTC

[3/4] incubator-quickstep git commit: Use BitVector as LIPFilter implementation when applicable

Use BitVector as LIPFilter implementation when applicable


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/2b2d7ba1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/2b2d7ba1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/2b2d7ba1

Branch: refs/heads/hdfs_text_scan
Commit: 2b2d7ba1970ade47b1170cd7974cb2fc53f7ba71
Parents: 1572762
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Wed Feb 22 14:06:55 2017 -0600
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Wed Feb 22 14:06:55 2017 -0600

----------------------------------------------------------------------
 query_optimizer/rules/AttachLIPFilters.cpp | 74 +++++++++++++++++++++++--
 query_optimizer/rules/AttachLIPFilters.hpp |  9 +++
 query_optimizer/rules/CMakeLists.txt       |  1 +
 3 files changed, 79 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2b2d7ba1/query_optimizer/rules/AttachLIPFilters.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/AttachLIPFilters.cpp b/query_optimizer/rules/AttachLIPFilters.cpp
index 4b6ac59..9a13b48 100644
--- a/query_optimizer/rules/AttachLIPFilters.cpp
+++ b/query_optimizer/rules/AttachLIPFilters.cpp
@@ -20,6 +20,7 @@
 #include "query_optimizer/rules/AttachLIPFilters.hpp"
 
 #include <algorithm>
+#include <cstdint>
 #include <map>
 #include <set>
 #include <unordered_set>
@@ -37,6 +38,7 @@
 #include "query_optimizer/physical/PhysicalType.hpp"
 #include "query_optimizer/physical/Selection.hpp"
 #include "query_optimizer/physical/TopLevelPlan.hpp"
+#include "types/TypeID.hpp"
 #include "types/TypedValue.hpp"
 #include "utility/lip_filter/LIPFilter.hpp"
 
@@ -126,11 +128,40 @@ void AttachLIPFilters::attachLIPFilters(
         const E::ExprId source_attr_id = pair.second->source_attribute->id();
         if (already_filtered_attributes->find(source_attr_id)
                 == already_filtered_attributes->end()) {
-          lip_filter_configuration_->addBuildInfo(
-              P::SingleIdentityHashFilterBuildInfo::Create(
-                  pair.second->source_attribute,
-                  std::max(64uL, pair.second->estimated_cardinality * 8u)),
-              pair.second->source);
+          bool use_exact_filter = false;
+          std::int64_t min_cpp_value;
+          std::int64_t max_cpp_value;
+          const bool has_exact_min_max_stats =
+              findExactMinMaxValuesForAttributeHelper(pair.second->source,
+                                                      pair.second->source_attribute,
+                                                      &min_cpp_value,
+                                                      &max_cpp_value);
+          if (has_exact_min_max_stats) {
+            const std::int64_t value_range = max_cpp_value - min_cpp_value;
+            DCHECK_GE(value_range, 0);
+            // TODO(jianqiao): Add this threshold as a gflag (together with
+            // InjectJoinFilters::kMaxFilterSize).
+            if (value_range <= 1000000000L) {
+              use_exact_filter = true;
+            }
+          }
+
+          if (use_exact_filter) {
+            lip_filter_configuration_->addBuildInfo(
+                P::BitVectorExactFilterBuildInfo::Create(
+                    pair.second->source_attribute,
+                    min_cpp_value,
+                    max_cpp_value,
+                    false),
+                pair.second->source);
+          } else {
+            lip_filter_configuration_->addBuildInfo(
+                P::SingleIdentityHashFilterBuildInfo::Create(
+                    pair.second->source_attribute,
+                    std::max(64uL, pair.second->estimated_cardinality * 8u)),
+                pair.second->source);
+          }
+
           lip_filter_configuration_->addProbeInfo(
               P::LIPFilterProbeInfo::Create(
                   pair.first,
@@ -258,5 +289,38 @@ const std::vector<AttachLIPFilters::LIPFilterInfoPtr>& AttachLIPFilters
   return probe_side_info_.at(node);
 }
 
+bool AttachLIPFilters::findExactMinMaxValuesForAttributeHelper(
+    const physical::PhysicalPtr &physical_plan,
+    const expressions::AttributeReferencePtr &attribute,
+    std::int64_t *min_cpp_value,
+    std::int64_t *max_cpp_value) const {
+  bool min_value_is_exact;
+  bool max_value_is_exact;
+
+  const TypedValue min_value =
+      cost_model_->findMinValueStat(physical_plan, attribute, &min_value_is_exact);
+  const TypedValue max_value =
+      cost_model_->findMaxValueStat(physical_plan, attribute, &max_value_is_exact);
+  if (min_value.isNull() || max_value.isNull() ||
+      (!min_value_is_exact) || (!max_value_is_exact)) {
+    return false;
+  }
+
+  switch (attribute->getValueType().getTypeID()) {
+    case TypeID::kInt: {
+      *min_cpp_value = min_value.getLiteral<int>();
+      *max_cpp_value = max_value.getLiteral<int>();
+      return true;
+    }
+    case TypeID::kLong: {
+      *min_cpp_value = min_value.getLiteral<std::int64_t>();
+      *max_cpp_value = max_value.getLiteral<std::int64_t>();
+      return true;
+    }
+    default:
+      return false;
+  }
+}
+
 }  // namespace optimizer
 }  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2b2d7ba1/query_optimizer/rules/AttachLIPFilters.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/AttachLIPFilters.hpp b/query_optimizer/rules/AttachLIPFilters.hpp
index b8cfc39..36cb010 100644
--- a/query_optimizer/rules/AttachLIPFilters.hpp
+++ b/query_optimizer/rules/AttachLIPFilters.hpp
@@ -21,6 +21,7 @@
 #define QUICKSTEP_QUERY_OPTIMIZER_RULES_ATTACH_LIP_FILTERS_HPP_
 
 #include <cstddef>
+#include <cstdint>
 #include <map>
 #include <memory>
 #include <set>
@@ -135,6 +136,14 @@ class AttachLIPFilters : public Rule<physical::Physical> {
 
   const std::vector<LIPFilterInfoPtr>& getProbeSideInfo(const NodeList &path);
 
+  // TODO(jianqiao): refactor this method as it is a duplication of
+  // InjectJoinFilters::findExactMinMaxValuesForAttributeHelper().
+  bool findExactMinMaxValuesForAttributeHelper(
+      const physical::PhysicalPtr &physical_plan,
+      const expressions::AttributeReferencePtr &attribute,
+      std::int64_t *min_cpp_value,
+      std::int64_t *max_cpp_value) const;
+
   std::unique_ptr<cost::StarSchemaSimpleCostModel> cost_model_;
   std::map<physical::PhysicalPtr, std::vector<LIPFilterInfoPtr>> build_side_info_;
   std::map<physical::PhysicalPtr, std::vector<LIPFilterInfoPtr>> probe_side_info_;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2b2d7ba1/query_optimizer/rules/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt
index 427500d..6847951 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -59,6 +59,7 @@ target_link_libraries(quickstep_queryoptimizer_rules_AttachLIPFilters
                       quickstep_queryoptimizer_physical_Selection
                       quickstep_queryoptimizer_physical_TopLevelPlan
                       quickstep_queryoptimizer_rules_Rule
+                      quickstep_types_TypeID
                       quickstep_types_TypedValue
                       quickstep_utility_Macros
                       quickstep_utility_lipfilter_LIPFilter)