You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by hb...@apache.org on 2016/07/01 04:41:36 UTC

[06/18] incubator-quickstep git commit: Created a class for storing probabilities.

Created a class for storing probabilities.


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/73917bf8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/73917bf8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/73917bf8

Branch: refs/heads/scheduler++
Commit: 73917bf874d71f4ab04e6ed293dc42afdf4e05c9
Parents: 13383a1
Author: Harshad Deshmukh <hb...@apache.org>
Authored: Tue Jun 21 11:45:46 2016 -0500
Committer: Harshad Deshmukh <hb...@apache.org>
Committed: Thu Jun 30 23:41:05 2016 -0500

----------------------------------------------------------------------
 query_execution/CMakeLists.txt                  |  13 ++
 query_execution/PolicyEnforcer.cpp              |   1 +
 query_execution/ProbabilityStore.hpp            | 223 +++++++++++++++++++
 .../tests/ProbabilityStore_unittest.cpp         |  75 +++++++
 4 files changed, 312 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/73917bf8/query_execution/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_execution/CMakeLists.txt b/query_execution/CMakeLists.txt
index fcd4f48..18ae0da 100644
--- a/query_execution/CMakeLists.txt
+++ b/query_execution/CMakeLists.txt
@@ -36,6 +36,7 @@ add_library(quickstep_queryexecution_ExecutionStats ../empty_src.cpp ExecutionSt
 add_library(quickstep_queryexecution_Foreman Foreman.cpp Foreman.hpp)
 add_library(quickstep_queryexecution_ForemanLite ../empty_src.cpp ForemanLite.hpp)
 add_library(quickstep_queryexecution_PolicyEnforcer PolicyEnforcer.cpp PolicyEnforcer.hpp)
+add_library(quickstep_queryexecution_ProbabilityStore ../empty_src.cpp ProbabilityStore.hpp)
 add_library(quickstep_queryexecution_QueryContext QueryContext.cpp QueryContext.hpp)
 add_library(quickstep_queryexecution_QueryContext_proto
             ${queryexecution_QueryContext_proto_srcs}
@@ -97,6 +98,7 @@ target_link_libraries(quickstep_queryexecution_PolicyEnforcer
                       glog
                       quickstep_queryexecution_ExecutionStats
                       quickstep_catalog_CatalogTypedefs
+                      quickstep_queryexecution_ProbabilityStore
                       quickstep_queryexecution_QueryExecutionMessages_proto
                       quickstep_queryexecution_QueryExecutionTypedefs
                       quickstep_queryexecution_QueryManager
@@ -106,6 +108,9 @@ target_link_libraries(quickstep_queryexecution_PolicyEnforcer
                       quickstep_relationaloperators_WorkOrder
                       quickstep_utility_Macros
                       tmb)
+target_link_libraries(quickstep_queryexecution_ProbabilityStore
+                      glog
+                      quickstep_utility_Macros)
 target_link_libraries(quickstep_queryexecution_QueryContext
                       glog
                       quickstep_catalog_CatalogDatabaseLite
@@ -252,6 +257,14 @@ if (ENABLE_DISTRIBUTED)
   add_test(BlockLocator_unittest BlockLocator_unittest)
 endif()
 
+add_executable(ProbabilityStore_unittest
+  "${CMAKE_CURRENT_SOURCE_DIR}/tests/ProbabilityStore_unittest.cpp")
+target_link_libraries(ProbabilityStore_unittest
+                      gtest
+                      gtest_main
+                      quickstep_queryexecution_ProbabilityStore)
+add_test(ProbabilityStore_unittest ProbabilityStore_unittest) 
+
 add_executable(QueryManager_unittest
   "${CMAKE_CURRENT_SOURCE_DIR}/tests/QueryManager_unittest.cpp")
 target_link_libraries(QueryManager_unittest

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/73917bf8/query_execution/PolicyEnforcer.cpp
----------------------------------------------------------------------
diff --git a/query_execution/PolicyEnforcer.cpp b/query_execution/PolicyEnforcer.cpp
index 84aa86a..db7206b 100644
--- a/query_execution/PolicyEnforcer.cpp
+++ b/query_execution/PolicyEnforcer.cpp
@@ -25,6 +25,7 @@
 #include <vector>
 
 #include "catalog/CatalogTypedefs.hpp"
+#include "query_execution/ProbabilityStore.hpp"
 #include "query_execution/QueryExecutionMessages.pb.h"
 #include "query_execution/QueryManager.hpp"
 #include "query_execution/WorkerDirectory.hpp"

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/73917bf8/query_execution/ProbabilityStore.hpp
----------------------------------------------------------------------
diff --git a/query_execution/ProbabilityStore.hpp b/query_execution/ProbabilityStore.hpp
new file mode 100644
index 0000000..8343d24
--- /dev/null
+++ b/query_execution/ProbabilityStore.hpp
@@ -0,0 +1,223 @@
+/**
+ *   Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ *     University of Wisconsin\u2014Madison.
+ *
+ *   Licensed under the Apache License, Version 2.0 (the "License");
+ *   you may not use this file except in compliance with the License.
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software
+ *   distributed under the License is distributed on an "AS IS" BASIS,
+ *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *   See the License for the specific language governing permissions and
+ *   limitations under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_EXECUTION_PROBABILITY_STORE_HPP_
+#define QUICKSTEP_QUERY_EXECUTION_PROBABILITY_STORE_HPP_
+
+#include <algorithm>
+#include <cstddef>
+#include <random>
+#include <unordered_map>
+#include <vector>
+
+#include "utility/Macros.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+
+/**
+ * @brief A class that stores the probabilities of objects. We use a field
+ *        called "property" to identify each object.
+ **/
+class ProbabilityStore {
+ public:
+  /**
+   * @brief Constructor.
+   **/
+  ProbabilityStore()
+      : mt_(std::random_device()()) {}
+
+  /**
+   * @brief Get the number of objects in the store.
+   **/
+  const std::size_t getNumObjects() const {
+    DCHECK_EQ(individual_probabilities_.size(), cumulative_probabilities_.size());
+    return individual_probabilities_.size();
+  }
+
+  /**
+   * @brief Add individual (not cumulative) probability for a given object.
+   *
+   * @note This function leaves the cumulative probabilities in a consistent
+   *       state. An alternative lazy implementation should be written if cost
+   *       of calculating cumulative probabilities is high.
+   * @note This function may override previously written probability values.
+   *
+   * @param property The property of the given object.
+   * @param individual_probability The individual (not cumulative) probability
+   *        of the given object.
+   **/
+  void addProbability(const std::size_t property,
+                      const float individual_probability) {
+    individual_probabilities_[property] = individual_probability;
+    updateCumulativeProbabilities();
+  }
+
+  /**
+   * @brief Add individual (not cumulative) probabilities for given objects.
+   *
+   * @note This function leaves the cumulative probabilities in a consistent
+   *       state. An alternative lazy implementation should be written if cost
+   *       of calculating cumulative probabilities is high.
+   * @note This function may override previously written probability values.
+   *
+   * @param properties A vector of properties to be added.
+   * @param individual_probabilities The individual (not cumulative)
+   *        probabilities of the given objects.
+   **/
+  void addProbabilities(const std::vector<std::size_t> &properties,
+                        const std::vector<float> &individual_probabilities) {
+    DCHECK_EQ(properties.size(), individual_probabilities.size());
+    for (std::size_t i = 0; i < properties.size(); ++i) {
+      individual_probabilities_[properties[i]] = individual_probabilities[i];
+    }
+    updateCumulativeProbabilities();
+  }
+
+  /**
+   * @brief Update  the probability of a given object to a new value.
+   *
+   * @param property The property of the object.
+   * @param new_individual_probability The new probability to be set.
+   **/
+  void updateProbability(const std::size_t property,
+                         const float new_individual_probability) {
+    auto it = individual_probabilities_.find(property);
+    DCHECK(it != individual_probabilities_.end());
+    it->second = new_individual_probability;
+    updateCumulativeProbabilities();
+  }
+
+  /**
+   * @brief Remove an object from the store.
+   *
+   * @param property The property of the object to be removed.
+   **/
+  void removeObject(const std::size_t property) {
+    auto it = individual_probabilities_.find(property);
+    DCHECK(it != individual_probabilities_.end());
+    individual_probabilities_.erase(it);
+    updateCumulativeProbabilities();
+  }
+
+  /**
+   * @brief Get the individual probability (not cumulative) for an object.
+   *
+   * @param property The property of the object.
+   **/
+  const float getIndividualProbability(const std::size_t property) const {
+    const auto it = individual_probabilities_.find(property);
+    DCHECK(it != individual_probabilities_.end());
+    return it->second;
+  }
+
+  /**
+   * @brief Update the cumulative probabilities.
+   *
+   * @note This function should be called upon if there are any updates,
+   *       additions or deletions to the individual probabilities.
+   * @note An efficient implementation should be written if there are large
+   *       number of objects.
+   **/
+  void updateCumulativeProbabilities() {
+    cumulative_probabilities_.clear();
+    if (individual_probabilities_.empty()) {
+      // No need to modify the cumulative probabilities.
+      return;
+    }
+    float cumulative_probability = 0;
+    for (const auto property_probability_pair : individual_probabilities_) {
+      cumulative_probabilities_.emplace_back(property_probability_pair.first,
+                                             cumulative_probability);
+      cumulative_probability += property_probability_pair.second;
+    }
+    // Adjust the last cumulative probability manually to 1.0, so that floating
+    // addition related rounding issues are ignored.
+    cumulative_probabilities_.back().updateProbability(1.0);
+  }
+
+  /**
+   * @brief Return a randomly chosen property.
+   *
+   * @note The random number is uniformly chosen.
+   **/
+  inline const std::size_t pickRandomProperty() {
+    std::uniform_real_distribution<float> dist(0.0, 1.0);
+    const float chosen_probability = dist(mt_);
+    return getPropertyForProbability(chosen_probability);
+  }
+
+ private:
+  class ProbabilityInfo {
+   public:
+    ProbabilityInfo(const std::size_t property, const float probability)
+        : property_(property), probability_(probability) {
+      DCHECK_LE(probability, 1.0);
+    }
+
+    ProbabilityInfo(const ProbabilityInfo &other) = default;
+
+    ProbabilityInfo& operator=(const ProbabilityInfo &other) = default;
+
+    void updateProbability(const float new_probability) {
+      DCHECK_LE(new_probability, 1.0);
+      probability_ = new_probability;
+    }
+
+    std::size_t property_;
+    float probability_;
+  };
+
+  /**
+   * @brief Get a property for a given cumulative probability.
+   *
+   * @param key_cumulative_probability The input cumulative probability.
+   *
+   * @return The object that has a cumulative probability that is greater than
+   *         or equal to the input cumulative probability.
+   **/
+  inline const std::size_t getPropertyForProbability(
+      const float key_cumulative_probability) {
+    DCHECK(!cumulative_probabilities_.empty());
+    // It doesn't matter in which order the objects are arranged in the
+    // cumulative_probabilities_ vector.
+    ProbabilityInfo search_key(0, key_cumulative_probability);
+    const auto it = std::upper_bound(
+        cumulative_probabilities_.begin(),
+        cumulative_probabilities_.end(),
+        search_key,
+        [](const ProbabilityInfo &a, const ProbabilityInfo &b) {
+          return a.probability_ < b.probability_;
+        });
+    DCHECK(it != std::end(cumulative_probabilities_));
+    return it->property_;
+  }
+
+  std::unordered_map<std::size_t, float> individual_probabilities_;
+  std::vector<ProbabilityInfo> cumulative_probabilities_;
+
+  std::mt19937_64 mt_;
+
+  DISALLOW_COPY_AND_ASSIGN(ProbabilityStore);
+};
+
+/** @} */
+
+}  // namespace quickstep
+
+#endif  // QUICKSTEP_QUERY_EXECUTION_PROBABILITY_STORE_HPP_

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/73917bf8/query_execution/tests/ProbabilityStore_unittest.cpp
----------------------------------------------------------------------
diff --git a/query_execution/tests/ProbabilityStore_unittest.cpp b/query_execution/tests/ProbabilityStore_unittest.cpp
new file mode 100644
index 0000000..e624557
--- /dev/null
+++ b/query_execution/tests/ProbabilityStore_unittest.cpp
@@ -0,0 +1,75 @@
+/**
+ *   Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ *     University of Wisconsin\u2014Madison.
+ *
+ *   Licensed under the Apache License, Version 2.0 (the "License");
+ *   you may not use this file except in compliance with the License.
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software
+ *   distributed under the License is distributed on an "AS IS" BASIS,
+ *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *   See the License for the specific language governing permissions and
+ *   limitations under the License.
+ **/
+
+#include <cstddef>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "query_execution/ProbabilityStore.hpp"
+
+namespace quickstep {
+
+TEST(ProbabilityStoreTest, CountTest) {
+  ProbabilityStore store;
+  EXPECT_EQ(0u, store.getNumObjects());
+  const std::size_t kProperty = 0;
+  store.addProbability(kProperty, 0.5);
+  EXPECT_EQ(1u, store.getNumObjects());
+  store.removeObject(kProperty);
+  EXPECT_EQ(0u, store.getNumObjects());
+
+  std::vector<std::size_t> objects {3, 5, 7, 9};
+  std::vector<float> probabilities {0.2, 0.3, 0.4, 0.1};
+  store.addProbabilities(objects, probabilities);
+
+  EXPECT_EQ(objects.size(), store.getNumObjects());
+}
+
+TEST(ProbabilityStoreTest, IndividualProbabilityTest) {
+  ProbabilityStore store;
+  std::vector<std::size_t> objects {3, 5, 7, 9};
+  std::vector<float> probabilities {0.2, 0.3, 0.4, 0.1};
+  store.addProbabilities(objects, probabilities);
+
+  for (std::size_t object_num = 0; object_num < objects.size(); ++object_num) {
+    EXPECT_EQ(probabilities[object_num],
+              store.getIndividualProbability(objects[object_num]));
+  }
+}
+
+TEST(ProbabilityStoreTest, PickRandomPropertyTest) {
+  ProbabilityStore store;
+  std::vector<std::size_t> objects {3, 5, 7, 9};
+  std::vector<float> probabilities {0.2, 0.3, 0.4, 0.1};
+  store.addProbabilities(objects, probabilities);
+
+  const std::size_t kNumTrials = 10;
+  while (!objects.empty()) {
+    for (std::size_t trial_num = 0; trial_num < kNumTrials; ++trial_num) {
+      const std::size_t picked_property = store.pickRandomProperty();
+      const auto it = std::find(objects.begin(), objects.end(), picked_property);
+      EXPECT_TRUE(it != objects.end());
+    }
+    const std::size_t property_to_be_removed = objects.back();
+    store.removeObject(property_to_be_removed);
+    objects.pop_back();
+    EXPECT_EQ(objects.size(), store.getNumObjects());
+  }
+}
+
+}  // namespace quickstep