You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by zu...@apache.org on 2016/05/30 23:18:47 UTC

[12/50] [abbrv] incubator-quickstep git commit: Support for NUMA aware preloading (#206)

Support for NUMA aware preloading (#206)

* Support for preloading of NUMA-partitioned relations.

- Stored relations which have been NUMA-partitioned, can now be
  preloaded. The preloading can maintain the same NUMA placement of
  storage blocks.

* Added missing glog library in CMakeLists


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/0f261ea1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/0f261ea1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/0f261ea1

Branch: refs/heads/work-order-serialization
Commit: 0f261ea16a9b8cdb10ca7d5c2c64a906b71ae9a2
Parents: 456b434
Author: Harshad Deshmukh <d....@gmail.com>
Authored: Wed May 4 10:32:47 2016 -0500
Committer: Jignesh Patel <pa...@users.noreply.github.com>
Committed: Wed May 4 10:32:47 2016 -0500

----------------------------------------------------------------------
 storage/CMakeLists.txt      |  5 +++
 storage/PreloaderThread.cpp | 86 +++++++++++++++++++++++++++++++++++-----
 storage/PreloaderThread.hpp | 23 +++++++++++
 3 files changed, 105 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/0f261ea1/storage/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/storage/CMakeLists.txt b/storage/CMakeLists.txt
index ed23802..dacacfa 100644
--- a/storage/CMakeLists.txt
+++ b/storage/CMakeLists.txt
@@ -741,6 +741,7 @@ target_link_libraries(quickstep_storage_PackedRowStoreValueAccessor
                       quickstep_utility_BitVector
                       quickstep_utility_Macros)
 target_link_libraries(quickstep_storage_PreloaderThread
+                      glog
                       quickstep_catalog_CatalogDatabase
                       quickstep_catalog_CatalogRelation
                       quickstep_catalog_CatalogTypedefs
@@ -750,6 +751,10 @@ target_link_libraries(quickstep_storage_PreloaderThread
                       quickstep_threading_Thread
                       quickstep_threading_ThreadUtil
                       quickstep_utility_Macros)
+if (QUICKSTEP_HAVE_LIBNUMA)
+  target_link_libraries(quickstep_storage_PreloaderThread
+                        quickstep_catalog_NUMAPlacementScheme)
+endif()
 target_link_libraries(quickstep_storage_SMAIndexSubBlock
                       glog
                       quickstep_catalog_CatalogAttribute

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/0f261ea1/storage/PreloaderThread.cpp
----------------------------------------------------------------------
diff --git a/storage/PreloaderThread.cpp b/storage/PreloaderThread.cpp
index d5dc55b..8f600b8 100644
--- a/storage/PreloaderThread.cpp
+++ b/storage/PreloaderThread.cpp
@@ -17,8 +17,13 @@
 
 #include "storage/PreloaderThread.hpp"
 
+#include <cstddef>
 #include <vector>
 
+#ifdef QUICKSTEP_HAVE_LIBNUMA
+#include <unordered_map>
+#endif
+
 #include "catalog/CatalogDatabase.hpp"
 #include "catalog/CatalogRelation.hpp"
 #include "catalog/CatalogTypedefs.hpp"
@@ -27,6 +32,12 @@
 #include "storage/StorageManager.hpp"
 #include "threading/ThreadUtil.hpp"
 
+#ifdef QUICKSTEP_HAVE_LIBNUMA
+#include "catalog/NUMAPlacementScheme.hpp"
+#endif
+
+#include "glog/logging.h"
+
 namespace quickstep {
 
 void PreloaderThread::run() {
@@ -38,24 +49,81 @@ void PreloaderThread::run() {
   std::size_t blocks_loaded = 0;
 
   for (const CatalogRelation &relation : database_) {
-    std::vector<block_id> blocks = relation.getBlocksSnapshot();
-    for (block_id current_block_id : blocks) {
+    if (relation.hasPartitionScheme()) {
+      blocks_loaded += preloadNUMAAware(relation, blocks_loaded, num_slots);
+    } else {
+      std::vector<block_id> blocks = relation.getBlocksSnapshot();
+      for (block_id current_block_id : blocks) {
+        try {
+          BlockReference current_block = storage_manager_->getBlock(current_block_id, relation);
+        } catch (...) {
+          LOG(ERROR) << "Error after loading " << blocks_loaded << "blocks\n";
+          throw;
+        }
+        ++blocks_loaded;
+        if (blocks_loaded == num_slots) {
+          // The buffer pool has filled up. But, some database blocks are not loaded.
+          printf(" The database is larger than the buffer pool. Only %lu blocks were loaded ",
+                 blocks_loaded);
+          return;
+        }
+      }
+    }
+  }
+  printf(" Loaded %lu blocks ", blocks_loaded);
+}
+
+std::size_t PreloaderThread::preloadNUMAAware(
+    const CatalogRelation &relation,
+    const std::size_t num_previously_loaded_blocks,
+    const std::size_t num_slots) {
+#ifdef QUICKSTEP_HAVE_LIBNUMA
+  std::size_t blocks_loaded = 0;
+  const NUMAPlacementScheme *placement_scheme = relation.getNUMAPlacementSchemePtr();
+  DCHECK(placement_scheme != nullptr);
+  DCHECK(relation.hasPartitionScheme());
+  const PartitionScheme &part_scheme = relation.getPartitionScheme();
+  const PartitionSchemeHeader &part_scheme_header =
+      part_scheme.getPartitionSchemeHeader();
+  const std::size_t num_partitions = part_scheme_header.getNumPartitions();
+  // Key = NUMA node ID, value = number of blocks loaded from that NUMA node.
+  std::unordered_map<numa_node_id, std::size_t> num_blocks_loaded;
+  for (std::size_t part_id = 0; part_id < num_partitions; ++part_id) {
+    const numa_node_id partition_numa_node_id =
+        placement_scheme->getNUMANodeForPartition(part_id);
+    for (block_id curr_block_id : part_scheme.getBlocksInPartition(part_id)) {
       try {
-        BlockReference current_block = storage_manager_->getBlock(current_block_id, relation);
+        BlockReference current_block = storage_manager_->getBlock(
+            curr_block_id, relation, partition_numa_node_id);
       } catch (...) {
-        LOG(ERROR) << "Error after loading " << blocks_loaded << "blocks\n";
+        LOG(ERROR) << "Error after loading "
+                   << blocks_loaded + num_previously_loaded_blocks
+                   << " blocks\n";
         throw;
       }
       ++blocks_loaded;
-      if (blocks_loaded == num_slots) {
+      num_blocks_loaded[partition_numa_node_id]++;
+      if ((blocks_loaded + num_previously_loaded_blocks) == num_slots) {
         // The buffer pool has filled up. But, some database blocks are not loaded.
-        printf(" The database is larger than the buffer pool. Only %lu blocks were loaded ",
-               blocks_loaded);
-        return;
+        printf(
+            " The database is larger than the buffer pool. Only %lu blocks "
+            "were loaded ",
+            blocks_loaded + num_previously_loaded_blocks);
+        return blocks_loaded;
       }
     }
   }
-  printf(" Loaded %lu blocks ", blocks_loaded);
+  LOG(INFO) << "For relation: " << relation.getName();
+  for (auto numa_block_loaded_info : num_blocks_loaded) {
+    LOG(INFO) << "NUMA node: " << numa_block_loaded_info.first
+              << " Number of loaded blocks: " << numa_block_loaded_info.second;
+  }
+  return blocks_loaded;
+#else
+  LOG(INFO) << "Relation: " << relation.getName()
+            << " has partition scheme but the system doesn't support NUMA";
+  return 0;
+#endif
 }
 
 }  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/0f261ea1/storage/PreloaderThread.hpp
----------------------------------------------------------------------
diff --git a/storage/PreloaderThread.hpp b/storage/PreloaderThread.hpp
index ed866b4..f16fd50 100644
--- a/storage/PreloaderThread.hpp
+++ b/storage/PreloaderThread.hpp
@@ -18,12 +18,15 @@
 #ifndef QUICKSTEP_STORAGE_PRELOADER_THREAD_HPP_
 #define QUICKSTEP_STORAGE_PRELOADER_THREAD_HPP_
 
+#include <cstddef>
+
 #include "threading/Thread.hpp"
 #include "utility/Macros.hpp"
 
 namespace quickstep {
 
 class CatalogDatabase;
+class CatalogRelation;
 class StorageManager;
 
 /** \addtogroup Storage
@@ -65,6 +68,26 @@ class PreloaderThread : public Thread {
   void run() override;
 
  private:
+  /**
+   * @brief Preload a relation which has a partition and a NUMA placement scheme.
+   *
+   * @param relation The relation to be preloaded.
+   * @param num_previously_loaded_blocks The number of blocks already preloaded.
+   * @param num_slots The maximum number of slots in the StorageManager.
+   *
+   * @warning This function may not detect skew on NUMA sockets, i.e. if a given
+   *          NUMA socket has large number of blocks, preloading may cause the
+   *          memory on that NUMA socket to be full. It is recommended to use
+   *          this preloading when we are sure that each NUMA socket has been
+   *          allocated sufficient amount of memory so as not to exceed that
+   *          socket's memory limit.
+   *
+   * @return The number of blocks loaded during this function call.
+   **/
+  std::size_t preloadNUMAAware(const CatalogRelation &relation,
+                               const std::size_t num_previously_loaded_blocks,
+                               const std::size_t num_slots);
+
   const CatalogDatabase &database_;
   StorageManager *storage_manager_;