You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by hb...@apache.org on 2016/07/16 22:17:07 UTC

[6/8] incubator-quickstep git commit: Add memory estimate for a query in handle.

Add memory estimate for a query in handle.


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/5f58d927
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/5f58d927
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/5f58d927

Branch: refs/heads/memory-estimate
Commit: 5f58d927ef18e5700c28e5965bd1f9cdd6cca773
Parents: eea5603
Author: Harshad Deshmukh <hb...@apache.org>
Authored: Mon Jul 11 16:32:28 2016 -0500
Committer: Harshad Deshmukh <hb...@apache.org>
Committed: Sat Jul 16 16:31:21 2016 -0500

----------------------------------------------------------------------
 query_optimizer/ExecutionGenerator.cpp          |  7 +++++
 query_optimizer/QueryHandle.hpp                 | 10 +++++++
 storage/HashTableFactory.hpp                    | 31 ++++++++++++++++++++
 storage/LinearOpenAddressingHashTable.hpp       | 12 ++++++++
 storage/SeparateChainingHashTable.hpp           | 13 ++++++++
 .../SimpleScalarSeparateChainingHashTable.hpp   |  8 +++++
 6 files changed, 81 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5f58d927/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index 45f5f78..33708f0 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -741,6 +741,13 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
   }
 
   hash_table_proto->set_estimated_num_entries(build_cardinality);
+  // Add the memory estimate to the query handle.
+  // TODO(harshad) - Convert the build_cardinality to bytes.
+  const std::size_t hash_table_estimated_memory_in_bytes =
+      JoinHashTableFactory::GetEstimatedMemoryInBytes(
+          build_cardinality,
+          HashTableImplTypeProtoFromString(FLAGS_join_hashtable_type));
+  query_handle_->addMemoryToEstimate(hash_table_estimated_memory_in_bytes);
 
   // Create three operators.
   const QueryPlan::DAGNodeIndex build_operator_index =

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5f58d927/query_optimizer/QueryHandle.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/QueryHandle.hpp b/query_optimizer/QueryHandle.hpp
index 04f672e..a07313f 100644
--- a/query_optimizer/QueryHandle.hpp
+++ b/query_optimizer/QueryHandle.hpp
@@ -50,6 +50,7 @@ class QueryHandle {
                        const std::uint64_t query_priority = 1)
       : query_id_(query_id),
         query_priority_(query_priority),
+        estimated_max_memory_in_bytes_(0),
         query_plan_(new QueryPlan()),
         query_result_relation_(nullptr) {}
 
@@ -153,9 +154,18 @@ class QueryHandle {
     return std::chrono::duration<double, std::milli>(admission_time_ - entry_time_).count();
   }
 
+  std::size_t getEstimatedMaxMemoryInBytes() const {
+    return estimated_max_memory_in_bytes_;
+  }
+
+  void addMemoryToEstimate(const std::size_t memory_bytes) {
+    estimated_max_memory_in_bytes_ += memory_bytes;
+  }
+
  private:
   const std::size_t query_id_;
   const std::uint64_t query_priority_;
+  std::size_t estimated_max_memory_in_bytes_;
 
   std::unique_ptr<QueryPlan> query_plan_;
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5f58d927/storage/HashTableFactory.hpp
----------------------------------------------------------------------
diff --git a/storage/HashTableFactory.hpp b/storage/HashTableFactory.hpp
index 34baaeb..47e3a24 100644
--- a/storage/HashTableFactory.hpp
+++ b/storage/HashTableFactory.hpp
@@ -135,6 +135,37 @@ template <typename ValueT,
           bool allow_duplicate_keys>
 class HashTableFactory {
  public:
+  static std::size_t GetEstimatedMemoryInBytes(
+      const std::size_t num_estimated_entries,
+      const serialization::HashTableImplType hash_table_type) {
+    switch (hash_table_type) {
+      case serialization::HashTableImplType::LINEAR_OPEN_ADDRESSING:
+        return LinearOpenAddressingHashTable<ValueT,
+                                             resizable,
+                                             serializable,
+                                             force_key_copy,
+                                             allow_duplicate_keys>::
+            GetEstimatedMemoryInBytes(num_estimated_entries);
+      case serialization::HashTableImplType::SEPARATE_CHAINING:
+        return SeparateChainingHashTable<ValueT,
+                                         resizable,
+                                         serializable,
+                                         force_key_copy,
+                                         allow_duplicate_keys>::
+            GetEstimatedMemoryInBytes(num_estimated_entries);
+      case serialization::HashTableImplType::SIMPLE_SCALAR_SEPARATE_CHAINING:
+        return SimpleScalarSeparateChainingHashTable<ValueT,
+                                                     resizable,
+                                                     serializable,
+                                                     force_key_copy,
+                                                     allow_duplicate_keys>::
+            GetEstimatedMemoryInBytes(num_estimated_entries);
+      default: {
+        LOG(FATAL) << "Unrecognized HashTableImplType\n";
+      }
+    }
+  }
+
   /**
    * @brief Create a new resizable HashTable, with the type selected by
    *        hash_table_type. Other parameters are forwarded to the HashTable's

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5f58d927/storage/LinearOpenAddressingHashTable.hpp
----------------------------------------------------------------------
diff --git a/storage/LinearOpenAddressingHashTable.hpp b/storage/LinearOpenAddressingHashTable.hpp
index 438a2d1..9f841cd 100644
--- a/storage/LinearOpenAddressingHashTable.hpp
+++ b/storage/LinearOpenAddressingHashTable.hpp
@@ -61,6 +61,18 @@ class LinearOpenAddressingHashTable : public HashTable<ValueT,
                                                        force_key_copy,
                                                        allow_duplicate_keys> {
  public:
+  static std::size_t GetEstimatedMemoryInBytes(const std::size_t num_entries) {
+    const std::size_t num_slots_tmp = get_next_prime_number(num_entries * kHashTableLoadFactor);
+    const std::size_t bucket_size = LinearOpenAddressingHashTable::ComputeBucketSize(sizeof(std::size_t) * 2);
+    // TODO(harshad) - Refine the bucket_size below with the help of KeyManager.
+    // NOTE(harshad) - We ignore the estimated variable key size term below
+    // assuming that we don't have variable length keys in SSB queries.
+    const std::size_t required_memory = LinearOpenAddressingHashTable::ComputeBucketSize(sizeof(std::size_t) * 2)
+                                  + (num_slots_tmp / kHashTableLoadFactor)
+                                      * (bucket_size /*+ *key_manager_.getEstimatedVariableKeySize()*/);
+    return required_memory;
+  }
+
   // Bring in constants from HashTable.
   static constexpr unsigned char kEmptyHashByte
       = HashTable<ValueT, resizable, serializable, force_key_copy, allow_duplicate_keys>::kEmptyHashByte;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5f58d927/storage/SeparateChainingHashTable.hpp
----------------------------------------------------------------------
diff --git a/storage/SeparateChainingHashTable.hpp b/storage/SeparateChainingHashTable.hpp
index c096b1b..e3dbff3 100644
--- a/storage/SeparateChainingHashTable.hpp
+++ b/storage/SeparateChainingHashTable.hpp
@@ -61,6 +61,19 @@ class SeparateChainingHashTable : public HashTable<ValueT,
                                                    force_key_copy,
                                                    allow_duplicate_keys> {
  public:
+  static std::size_t GetEstimatedMemoryInBytes(const std::size_t num_entries) {
+    const std::size_t num_slots_tmp = get_next_prime_number(num_entries * kHashTableLoadFactor);
+    // TODO(harshad) - Refine the bucket_size below with the help of KeyManager.
+    const std::size_t bucket_size = SeparateChainingHashTable::ComputeBucketSize(sizeof(std::size_t) * 2);
+    // NOTE(harshad) - We ignore the estimated variable key size term below
+    // assuming that we don't have variable length keys in SSB queries.
+    const std::size_t required_memory =
+        sizeof(Header) + num_slots_tmp * sizeof(std::atomic<std::size_t>) +
+        (num_slots_tmp / kHashTableLoadFactor) *
+            (bucket_size /*+ *key_manager_.getEstimatedVariableKeySize()*/);
+    return required_memory;
+  }
+
   SeparateChainingHashTable(const std::vector<const Type*> &key_types,
                             const std::size_t num_entries,
                             StorageManager *storage_manager);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5f58d927/storage/SimpleScalarSeparateChainingHashTable.hpp
----------------------------------------------------------------------
diff --git a/storage/SimpleScalarSeparateChainingHashTable.hpp b/storage/SimpleScalarSeparateChainingHashTable.hpp
index 962a66c..c6e5b4e 100644
--- a/storage/SimpleScalarSeparateChainingHashTable.hpp
+++ b/storage/SimpleScalarSeparateChainingHashTable.hpp
@@ -74,6 +74,14 @@ class SimpleScalarSeparateChainingHashTable : public HashTable<ValueT,
                                                                force_key_copy,
                                                                allow_duplicate_keys> {
  public:
+  static std::size_t GetEstimatedMemoryInBytes(const std::size_t num_entries) {
+    const std::size_t num_slots_tmp = get_next_prime_number(num_entries * kHashTableLoadFactor);
+    const std::size_t required_memory = sizeof(Header)
+                                  + num_slots_tmp * sizeof(std::atomic<std::size_t>)
+                                  + (num_slots_tmp / kHashTableLoadFactor) * sizeof(Bucket);
+    return required_memory;
+  }
+
   SimpleScalarSeparateChainingHashTable(const std::vector<const Type*> &key_types,
                                         const std::size_t num_entries,
                                         StorageManager *storage_manager);