You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by hb...@apache.org on 2016/06/25 14:40:39 UTC
[06/11] incubator-quickstep git commit: Created a class for storing
probabilities.
Created a class for storing probabilities.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/1b24694b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/1b24694b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/1b24694b
Branch: refs/heads/scheduler++
Commit: 1b24694bf99665d7e151d8ded751a258de000276
Parents: 8e973b8
Author: Harshad Deshmukh <hb...@apache.org>
Authored: Tue Jun 21 11:45:46 2016 -0500
Committer: Harshad Deshmukh <hb...@apache.org>
Committed: Sat Jun 25 09:40:01 2016 -0500
----------------------------------------------------------------------
query_execution/CMakeLists.txt | 13 ++
query_execution/PolicyEnforcer.cpp | 1 +
query_execution/ProbabilityStore.hpp | 223 +++++++++++++++++++
.../tests/ProbabilityStore_unittest.cpp | 75 +++++++
4 files changed, 312 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b24694b/query_execution/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_execution/CMakeLists.txt b/query_execution/CMakeLists.txt
index fcd4f48..18ae0da 100644
--- a/query_execution/CMakeLists.txt
+++ b/query_execution/CMakeLists.txt
@@ -36,6 +36,7 @@ add_library(quickstep_queryexecution_ExecutionStats ../empty_src.cpp ExecutionSt
add_library(quickstep_queryexecution_Foreman Foreman.cpp Foreman.hpp)
add_library(quickstep_queryexecution_ForemanLite ../empty_src.cpp ForemanLite.hpp)
add_library(quickstep_queryexecution_PolicyEnforcer PolicyEnforcer.cpp PolicyEnforcer.hpp)
+add_library(quickstep_queryexecution_ProbabilityStore ../empty_src.cpp ProbabilityStore.hpp)
add_library(quickstep_queryexecution_QueryContext QueryContext.cpp QueryContext.hpp)
add_library(quickstep_queryexecution_QueryContext_proto
${queryexecution_QueryContext_proto_srcs}
@@ -97,6 +98,7 @@ target_link_libraries(quickstep_queryexecution_PolicyEnforcer
glog
quickstep_queryexecution_ExecutionStats
quickstep_catalog_CatalogTypedefs
+ quickstep_queryexecution_ProbabilityStore
quickstep_queryexecution_QueryExecutionMessages_proto
quickstep_queryexecution_QueryExecutionTypedefs
quickstep_queryexecution_QueryManager
@@ -106,6 +108,9 @@ target_link_libraries(quickstep_queryexecution_PolicyEnforcer
quickstep_relationaloperators_WorkOrder
quickstep_utility_Macros
tmb)
+target_link_libraries(quickstep_queryexecution_ProbabilityStore
+ glog
+ quickstep_utility_Macros)
target_link_libraries(quickstep_queryexecution_QueryContext
glog
quickstep_catalog_CatalogDatabaseLite
@@ -252,6 +257,14 @@ if (ENABLE_DISTRIBUTED)
add_test(BlockLocator_unittest BlockLocator_unittest)
endif()
+add_executable(ProbabilityStore_unittest
+ "${CMAKE_CURRENT_SOURCE_DIR}/tests/ProbabilityStore_unittest.cpp")
+target_link_libraries(ProbabilityStore_unittest
+ gtest
+ gtest_main
+ quickstep_queryexecution_ProbabilityStore)
+add_test(ProbabilityStore_unittest ProbabilityStore_unittest)
+
add_executable(QueryManager_unittest
"${CMAKE_CURRENT_SOURCE_DIR}/tests/QueryManager_unittest.cpp")
target_link_libraries(QueryManager_unittest
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b24694b/query_execution/PolicyEnforcer.cpp
----------------------------------------------------------------------
diff --git a/query_execution/PolicyEnforcer.cpp b/query_execution/PolicyEnforcer.cpp
index 84aa86a..db7206b 100644
--- a/query_execution/PolicyEnforcer.cpp
+++ b/query_execution/PolicyEnforcer.cpp
@@ -25,6 +25,7 @@
#include <vector>
#include "catalog/CatalogTypedefs.hpp"
+#include "query_execution/ProbabilityStore.hpp"
#include "query_execution/QueryExecutionMessages.pb.h"
#include "query_execution/QueryManager.hpp"
#include "query_execution/WorkerDirectory.hpp"
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b24694b/query_execution/ProbabilityStore.hpp
----------------------------------------------------------------------
diff --git a/query_execution/ProbabilityStore.hpp b/query_execution/ProbabilityStore.hpp
new file mode 100644
index 0000000..8343d24
--- /dev/null
+++ b/query_execution/ProbabilityStore.hpp
@@ -0,0 +1,223 @@
+/**
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin\u2014Madison.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_EXECUTION_PROBABILITY_STORE_HPP_
+#define QUICKSTEP_QUERY_EXECUTION_PROBABILITY_STORE_HPP_
+
+#include <algorithm>
+#include <cstddef>
+#include <random>
+#include <unordered_map>
+#include <vector>
+
+#include "utility/Macros.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+
+/**
+ * @brief A class that stores the probabilities of objects. We use a field
+ * called "property" to identify each object.
+ **/
+class ProbabilityStore {
+ public:
+ /**
+ * @brief Constructor.
+ **/
+ ProbabilityStore()
+ : mt_(std::random_device()()) {}
+
+ /**
+ * @brief Get the number of objects in the store.
+ **/
+ const std::size_t getNumObjects() const {
+ DCHECK_EQ(individual_probabilities_.size(), cumulative_probabilities_.size());
+ return individual_probabilities_.size();
+ }
+
+ /**
+ * @brief Add individual (not cumulative) probability for a given object.
+ *
+ * @note This function leaves the cumulative probabilities in a consistent
+ * state. An alternative lazy implementation should be written if cost
+ * of calculating cumulative probabilities is high.
+ * @note This function may override previously written probability values.
+ *
+ * @param property The property of the given object.
+ * @param individual_probability The individual (not cumulative) probability
+ * of the given object.
+ **/
+ void addProbability(const std::size_t property,
+ const float individual_probability) {
+ individual_probabilities_[property] = individual_probability;
+ updateCumulativeProbabilities();
+ }
+
+ /**
+ * @brief Add individual (not cumulative) probabilities for given objects.
+ *
+ * @note This function leaves the cumulative probabilities in a consistent
+ * state. An alternative lazy implementation should be written if cost
+ * of calculating cumulative probabilities is high.
+ * @note This function may override previously written probability values.
+ *
+ * @param properties A vector of properties to be added.
+ * @param individual_probabilities The individual (not cumulative)
+ * probabilities of the given objects.
+ **/
+ void addProbabilities(const std::vector<std::size_t> &properties,
+ const std::vector<float> &individual_probabilities) {
+ DCHECK_EQ(properties.size(), individual_probabilities.size());
+ for (std::size_t i = 0; i < properties.size(); ++i) {
+ individual_probabilities_[properties[i]] = individual_probabilities[i];
+ }
+ updateCumulativeProbabilities();
+ }
+
+ /**
+ * @brief Update the probability of a given object to a new value.
+ *
+ * @param property The property of the object.
+ * @param new_individual_probability The new probability to be set.
+ **/
+ void updateProbability(const std::size_t property,
+ const float new_individual_probability) {
+ auto it = individual_probabilities_.find(property);
+ DCHECK(it != individual_probabilities_.end());
+ it->second = new_individual_probability;
+ updateCumulativeProbabilities();
+ }
+
+ /**
+ * @brief Remove an object from the store.
+ *
+ * @param property The property of the object to be removed.
+ **/
+ void removeObject(const std::size_t property) {
+ auto it = individual_probabilities_.find(property);
+ DCHECK(it != individual_probabilities_.end());
+ individual_probabilities_.erase(it);
+ updateCumulativeProbabilities();
+ }
+
+ /**
+ * @brief Get the individual probability (not cumulative) for an object.
+ *
+ * @param property The property of the object.
+ **/
+ const float getIndividualProbability(const std::size_t property) const {
+ const auto it = individual_probabilities_.find(property);
+ DCHECK(it != individual_probabilities_.end());
+ return it->second;
+ }
+
+ /**
+ * @brief Update the cumulative probabilities.
+ *
+ * @note This function should be called upon if there are any updates,
+ * additions or deletions to the individual probabilities.
+ * @note An efficient implementation should be written if there are large
+ * number of objects.
+ **/
+ void updateCumulativeProbabilities() {
+ cumulative_probabilities_.clear();
+ if (individual_probabilities_.empty()) {
+ // No need to modify the cumulative probabilities.
+ return;
+ }
+ float cumulative_probability = 0;
+ for (const auto property_probability_pair : individual_probabilities_) {
+ cumulative_probabilities_.emplace_back(property_probability_pair.first,
+ cumulative_probability);
+ cumulative_probability += property_probability_pair.second;
+ }
+ // Adjust the last cumulative probability manually to 1.0, so that floating
+ // addition related rounding issues are ignored.
+ cumulative_probabilities_.back().updateProbability(1.0);
+ }
+
+ /**
+ * @brief Return a randomly chosen property.
+ *
+ * @note The random number is uniformly chosen.
+ **/
+ inline const std::size_t pickRandomProperty() {
+ std::uniform_real_distribution<float> dist(0.0, 1.0);
+ const float chosen_probability = dist(mt_);
+ return getPropertyForProbability(chosen_probability);
+ }
+
+ private:
+ class ProbabilityInfo {
+ public:
+ ProbabilityInfo(const std::size_t property, const float probability)
+ : property_(property), probability_(probability) {
+ DCHECK_LE(probability, 1.0);
+ }
+
+ ProbabilityInfo(const ProbabilityInfo &other) = default;
+
+ ProbabilityInfo& operator=(const ProbabilityInfo &other) = default;
+
+ void updateProbability(const float new_probability) {
+ DCHECK_LE(new_probability, 1.0);
+ probability_ = new_probability;
+ }
+
+ std::size_t property_;
+ float probability_;
+ };
+
+ /**
+ * @brief Get a property for a given cumulative probability.
+ *
+ * @param key_cumulative_probability The input cumulative probability.
+ *
+ * @return The object that has a cumulative probability that is greater than
+ * or equal to the input cumulative probability.
+ **/
+ inline const std::size_t getPropertyForProbability(
+ const float key_cumulative_probability) {
+ DCHECK(!cumulative_probabilities_.empty());
+ // It doesn't matter in which order the objects are arranged in the
+ // cumulative_probabilities_ vector.
+ ProbabilityInfo search_key(0, key_cumulative_probability);
+ const auto it = std::upper_bound(
+ cumulative_probabilities_.begin(),
+ cumulative_probabilities_.end(),
+ search_key,
+ [](const ProbabilityInfo &a, const ProbabilityInfo &b) {
+ return a.probability_ < b.probability_;
+ });
+ DCHECK(it != std::end(cumulative_probabilities_));
+ return it->property_;
+ }
+
+ std::unordered_map<std::size_t, float> individual_probabilities_;
+ std::vector<ProbabilityInfo> cumulative_probabilities_;
+
+ std::mt19937_64 mt_;
+
+ DISALLOW_COPY_AND_ASSIGN(ProbabilityStore);
+};
+
+/** @} */
+
+} // namespace quickstep
+
+#endif // QUICKSTEP_QUERY_EXECUTION_PROBABILITY_STORE_HPP_
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/1b24694b/query_execution/tests/ProbabilityStore_unittest.cpp
----------------------------------------------------------------------
diff --git a/query_execution/tests/ProbabilityStore_unittest.cpp b/query_execution/tests/ProbabilityStore_unittest.cpp
new file mode 100644
index 0000000..e624557
--- /dev/null
+++ b/query_execution/tests/ProbabilityStore_unittest.cpp
@@ -0,0 +1,75 @@
+/**
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin\u2014Madison.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **/
+
+#include <cstddef>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "query_execution/ProbabilityStore.hpp"
+
+namespace quickstep {
+
+TEST(ProbabilityStoreTest, CountTest) {
+ ProbabilityStore store;
+ EXPECT_EQ(0u, store.getNumObjects());
+ const std::size_t kProperty = 0;
+ store.addProbability(kProperty, 0.5);
+ EXPECT_EQ(1u, store.getNumObjects());
+ store.removeObject(kProperty);
+ EXPECT_EQ(0u, store.getNumObjects());
+
+ std::vector<std::size_t> objects {3, 5, 7, 9};
+ std::vector<float> probabilities {0.2, 0.3, 0.4, 0.1};
+ store.addProbabilities(objects, probabilities);
+
+ EXPECT_EQ(objects.size(), store.getNumObjects());
+}
+
+TEST(ProbabilityStoreTest, IndividualProbabilityTest) {
+ ProbabilityStore store;
+ std::vector<std::size_t> objects {3, 5, 7, 9};
+ std::vector<float> probabilities {0.2, 0.3, 0.4, 0.1};
+ store.addProbabilities(objects, probabilities);
+
+ for (std::size_t object_num = 0; object_num < objects.size(); ++object_num) {
+ EXPECT_EQ(probabilities[object_num],
+ store.getIndividualProbability(objects[object_num]));
+ }
+}
+
+TEST(ProbabilityStoreTest, PickRandomPropertyTest) {
+ ProbabilityStore store;
+ std::vector<std::size_t> objects {3, 5, 7, 9};
+ std::vector<float> probabilities {0.2, 0.3, 0.4, 0.1};
+ store.addProbabilities(objects, probabilities);
+
+ const std::size_t kNumTrials = 10;
+ while (!objects.empty()) {
+ for (std::size_t trial_num = 0; trial_num < kNumTrials; ++trial_num) {
+ const std::size_t picked_property = store.pickRandomProperty();
+ const auto it = std::find(objects.begin(), objects.end(), picked_property);
+ EXPECT_TRUE(it != objects.end());
+ }
+ const std::size_t property_to_be_removed = objects.back();
+ store.removeObject(property_to_be_removed);
+ objects.pop_back();
+ EXPECT_EQ(objects.size(), store.getNumObjects());
+ }
+}
+
+} // namespace quickstep