You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by ji...@apache.org on 2016/07/07 18:57:56 UTC
[6/8] incubator-quickstep git commit: Initial commit
Initial commit
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/10b25333
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/10b25333
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/10b25333
Branch: refs/heads/adaptive-bloom-filters
Commit: 10b25333b3916621acebe15e89f0407225fcc41d
Parents: 04c8224
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Sat Jun 11 23:14:00 2016 -0500
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Thu Jul 7 12:54:04 2016 -0500
----------------------------------------------------------------------
CMakeLists.txt | 2 +
catalog/CMakeLists.txt | 9 +
catalog/Catalog.proto | 5 +
catalog/CatalogRelation.cpp | 16 +-
catalog/CatalogRelationConstraints.cpp | 55 ++++++
catalog/CatalogRelationConstraints.hpp | 97 ++++++++++
catalog/CatalogRelationSchema.cpp | 15 ++
catalog/CatalogRelationSchema.hpp | 16 +-
cli/CommandExecutor.cpp | 25 ++-
cli/QuickstepCli.cpp | 49 ++++++
compression/CompressionDictionaryLite.hpp | 42 +++++
query_execution/CMakeLists.txt | 1 +
query_execution/Worker.cpp | 5 +
query_execution/tests/QueryManager_unittest.cpp | 4 +
query_optimizer/ExecutionHeuristics.cpp | 17 +-
query_optimizer/ExecutionHeuristics.hpp | 2 +-
query_optimizer/PhysicalGenerator.cpp | 2 +-
query_optimizer/cost_model/SimpleCostModel.cpp | 4 +-
.../StarSchemaHashJoinOrderOptimization.cpp | 1 +
relational_operators/AggregationOperator.hpp | 4 +
relational_operators/BuildHashOperator.hpp | 4 +
relational_operators/CreateIndexOperator.hpp | 4 +
relational_operators/CreateTableOperator.hpp | 4 +
relational_operators/DeleteOperator.hpp | 4 +
relational_operators/DestroyHashOperator.hpp | 4 +
relational_operators/DropTableOperator.hpp | 4 +
.../FinalizeAggregationOperator.hpp | 4 +
relational_operators/HashJoinOperator.hpp | 35 +++-
relational_operators/InsertOperator.hpp | 4 +
.../NestedLoopsJoinOperator.hpp | 4 +
relational_operators/RelationalOperator.hpp | 16 ++
relational_operators/SampleOperator.hpp | 4 +
relational_operators/SaveBlocksOperator.hpp | 4 +
relational_operators/SelectOperator.hpp | 4 +
relational_operators/SortMergeRunOperator.hpp | 4 +
.../SortRunGenerationOperator.hpp | 4 +
relational_operators/TableGeneratorOperator.hpp | 4 +
relational_operators/TextScanOperator.hpp | 4 +
relational_operators/UpdateOperator.hpp | 4 +
relational_operators/WorkOrder.hpp | 11 +-
storage/BasicColumnStoreValueAccessor.hpp | 26 ++-
storage/CMakeLists.txt | 2 +
storage/CompressedColumnStoreValueAccessor.hpp | 22 +++
.../CompressedPackedRowStoreValueAccessor.hpp | 22 +++
storage/PackedRowStoreValueAccessor.hpp | 25 ++-
storage/SplitRowStoreValueAccessor.hpp | 45 +++++
storage/ValueAccessor.hpp | 36 ++++
types/containers/ColumnVector.hpp | 35 ++++
types/containers/ColumnVectorsValueAccessor.hpp | 17 ++
utility/BloomFilterAdapter.hpp | 128 ++++++++++++++
utility/CMakeLists.txt | 17 ++
utility/DAGVisualizer.cpp | 167 ++++++++++++++++++
utility/DAGVisualizer.hpp | 85 +++++++++
utility/EventProfiler.cpp | 29 +++
utility/EventProfiler.hpp | 176 +++++++++++++++++++
55 files changed, 1279 insertions(+), 54 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 20e1fb9..ae85b75 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -760,6 +760,8 @@ target_link_libraries(quickstep_cli_shell
quickstep_queryoptimizer_QueryProcessor
quickstep_storage_PreloaderThread
quickstep_threading_ThreadIDBasedMap
+ quickstep_utility_DAGVisualizer
+ quickstep_utility_EventProfiler
quickstep_utility_Macros
quickstep_utility_PtrVector
quickstep_utility_SqlError
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/catalog/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/catalog/CMakeLists.txt b/catalog/CMakeLists.txt
index 64b4f16..0f50706 100644
--- a/catalog/CMakeLists.txt
+++ b/catalog/CMakeLists.txt
@@ -35,6 +35,9 @@ add_library(quickstep_catalog_CatalogDatabaseCache CatalogDatabaseCache.cpp Cata
add_library(quickstep_catalog_CatalogDatabaseLite ../empty_src.cpp CatalogDatabaseLite.hpp)
add_library(quickstep_catalog_CatalogErrors ../empty_src.cpp CatalogErrors.hpp)
add_library(quickstep_catalog_CatalogRelation CatalogRelation.cpp CatalogRelation.hpp)
+add_library(quickstep_catalog_CatalogRelationConstraints
+ CatalogRelationConstraints.cpp
+ CatalogRelationConstraints.hpp)
add_library(quickstep_catalog_CatalogRelationSchema
CatalogRelationSchema.cpp
CatalogRelationSchema.hpp)
@@ -117,6 +120,10 @@ target_link_libraries(quickstep_catalog_CatalogRelation
quickstep_threading_SpinSharedMutex
quickstep_utility_Macros
quickstep_utility_PtrVector)
+target_link_libraries(quickstep_catalog_CatalogRelationConstraints
+ quickstep_catalog_CatalogTypedefs
+ quickstep_catalog_Catalog_proto
+ quickstep_utility_Macros)
target_link_libraries(quickstep_catalog_CatalogRelationStatistics
quickstep_catalog_CatalogTypedefs
quickstep_catalog_Catalog_proto
@@ -136,6 +143,7 @@ target_link_libraries(quickstep_catalog_CatalogRelationSchema
glog
quickstep_catalog_CatalogAttribute
quickstep_catalog_CatalogErrors
+ quickstep_catalog_CatalogRelationConstraints
quickstep_catalog_CatalogTypedefs
quickstep_catalog_Catalog_proto
quickstep_types_Type
@@ -182,6 +190,7 @@ target_link_libraries(quickstep_catalog
quickstep_catalog_CatalogDatabaseLite
quickstep_catalog_CatalogErrors
quickstep_catalog_CatalogRelation
+ quickstep_catalog_CatalogRelationConstraints
quickstep_catalog_CatalogRelationSchema
quickstep_catalog_CatalogRelationStatistics
quickstep_catalog_CatalogTypedefs
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/catalog/Catalog.proto
----------------------------------------------------------------------
diff --git a/catalog/Catalog.proto b/catalog/Catalog.proto
index ce4bc2e..a51172f 100644
--- a/catalog/Catalog.proto
+++ b/catalog/Catalog.proto
@@ -80,6 +80,10 @@ message IndexScheme {
repeated IndexEntry index_entries = 1;
}
+message CatalogRelationConstraints {
+ repeated int32 primary_key = 1;
+}
+
message CatalogRelationStatistics {
optional fixed64 num_tuples = 1;
@@ -96,6 +100,7 @@ message CatalogRelationSchema {
required bool temporary = 3;
repeated CatalogAttribute attributes = 4;
+ optional CatalogRelationConstraints constraints = 5;
extensions 16 to max;
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/catalog/CatalogRelation.cpp
----------------------------------------------------------------------
diff --git a/catalog/CatalogRelation.cpp b/catalog/CatalogRelation.cpp
index 01aebb5..682b6be 100644
--- a/catalog/CatalogRelation.cpp
+++ b/catalog/CatalogRelation.cpp
@@ -143,21 +143,7 @@ CatalogRelation::CatalogRelation(const serialization::CatalogRelationSchema &pro
}
serialization::CatalogRelationSchema CatalogRelation::getProto() const {
- serialization::CatalogRelationSchema proto;
-
- proto.set_relation_id(id_);
- proto.set_name(name_);
- proto.set_temporary(temporary_);
-
- for (PtrVector<CatalogAttribute, true>::const_iterator it = attr_vec_.begin();
- it != attr_vec_.end();
- ++it) {
- if (it.isNull()) {
- proto.add_attributes();
- } else {
- proto.add_attributes()->MergeFrom(it->getProto());
- }
- }
+ serialization::CatalogRelationSchema proto = CatalogRelationSchema::getProto();
proto.MutableExtension(serialization::CatalogRelation::default_layout)
->MergeFrom(getDefaultStorageBlockLayout().getDescription());
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/catalog/CatalogRelationConstraints.cpp
----------------------------------------------------------------------
diff --git a/catalog/CatalogRelationConstraints.cpp b/catalog/CatalogRelationConstraints.cpp
new file mode 100644
index 0000000..4525a98
--- /dev/null
+++ b/catalog/CatalogRelationConstraints.cpp
@@ -0,0 +1,55 @@
+/**
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin\u2014Madison.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **/
+
+#include "catalog/CatalogRelationConstraints.hpp"
+
+#include "catalog/Catalog.pb.h"
+
+namespace quickstep {
+
+CatalogRelationConstraints::CatalogRelationConstraints(
+ const serialization::CatalogRelationConstraints &proto) {
+ if (proto.primary_key_size() > 0) {
+ primary_key_.reset(new std::set<attribute_id>());
+ for (std::size_t i = 0; i < proto.primary_key_size(); ++i) {
+ primary_key_->emplace(proto.primary_key(i));
+ }
+ }
+}
+
+serialization::CatalogRelationConstraints CatalogRelationConstraints::getProto() const {
+ serialization::CatalogRelationConstraints proto;
+ if (primary_key_ != nullptr) {
+ for (const auto attr_id : *primary_key_) {
+ proto.add_primary_key(attr_id);
+ }
+ }
+ return proto;
+}
+
+bool CatalogRelationConstraints::ProtoIsValid(
+ const serialization::CatalogRelationConstraints &proto,
+ const std::size_t num_attributes) {
+ for (std::size_t i = 0; i < proto.primary_key_size(); ++i) {
+ if (proto.primary_key(i) >= num_attributes) {
+ return false;
+ }
+ }
+ return true;
+}
+
+} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/catalog/CatalogRelationConstraints.hpp
----------------------------------------------------------------------
diff --git a/catalog/CatalogRelationConstraints.hpp b/catalog/CatalogRelationConstraints.hpp
new file mode 100644
index 0000000..135ccb9
--- /dev/null
+++ b/catalog/CatalogRelationConstraints.hpp
@@ -0,0 +1,97 @@
+/**
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin\u2014Madison.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **/
+
+#ifndef QUICKSTEP_CATALOG_CATALOG_RELATION_CONSTRAINTS_HPP_
+#define QUICKSTEP_CATALOG_CATALOG_RELATION_CONSTRAINTS_HPP_
+
+#include <cstddef>
+#include <memory>
+#include <set>
+#include <utility>
+
+#include "catalog/Catalog.pb.h"
+#include "catalog/CatalogTypedefs.hpp"
+#include "utility/Macros.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+
+/** \addtogroup Catalog
+ * @{
+ */
+
+/**
+ * @brief Constraints on a catalog relation.
+ **/
+class CatalogRelationConstraints {
+ public:
+ /**
+ * @brief Constructor.
+ **/
+ CatalogRelationConstraints() {}
+
+ /**
+ * @brief Reconstruct a CatalogRelationConstraints object from its serialized
+ * Protocol Buffer form.
+ *
+ * @param proto The Protocol Buffer serialization of a CatalogRelationConstraints
+ * object, previously produced by getProto().
+ **/
+ explicit CatalogRelationConstraints(const serialization::CatalogRelationConstraints &proto);
+
+ /**
+ * @brief Serialize the CatalogRelationConstraints object as Protocol Buffer.
+ *
+ * @return The Protocol Buffer representation of the CatalogRelationConstraints
+ * object.
+ **/
+ serialization::CatalogRelationConstraints getProto() const;
+
+ static bool ProtoIsValid(const serialization::CatalogRelationConstraints &proto,
+ const std::size_t num_attributes);
+
+ bool hasPrimaryKey() const {
+ return (primary_key_ != nullptr);
+ }
+
+ const std::set<attribute_id>* getPrimaryKey() const {
+ return primary_key_.get();
+ }
+
+ template <typename IterableT>
+ void setPrimaryKey(IterableT &&primary_key) {
+ CHECK(!primary_key.empty());
+ primary_key_.reset(
+ new std::set<attribute_id>(primary_key.begin(), primary_key.end()));
+ }
+
+ void removePrimaryKey() {
+ primary_key_.reset();
+ }
+
+ private:
+ std::unique_ptr<std::set<attribute_id>> primary_key_;
+
+ DISALLOW_COPY_AND_ASSIGN(CatalogRelationConstraints);
+};
+
+/** @} */
+
+} // namespace quickstep
+
+#endif // QUICKSTEP_CATALOG_CATALOG_RELATION_CONSTRAINTS_HPP_
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/catalog/CatalogRelationSchema.cpp
----------------------------------------------------------------------
diff --git a/catalog/CatalogRelationSchema.cpp b/catalog/CatalogRelationSchema.cpp
index 97c834f..bf8217d 100644
--- a/catalog/CatalogRelationSchema.cpp
+++ b/catalog/CatalogRelationSchema.cpp
@@ -27,6 +27,7 @@
#include "catalog/Catalog.pb.h"
#include "catalog/CatalogAttribute.hpp"
#include "catalog/CatalogErrors.hpp"
+#include "catalog/CatalogRelationConstraints.hpp"
#include "catalog/CatalogTypedefs.hpp"
#include "types/Type.hpp"
#include "utility/PtrVector.hpp"
@@ -70,6 +71,12 @@ CatalogRelationSchema::CatalogRelationSchema(const serialization::CatalogRelatio
attr_vec_.push_back(nullptr);
}
}
+
+ if (proto.has_constraints()) {
+ constraints_.reset(new CatalogRelationConstraints(proto.constraints()));
+ } else {
+ constraints_.reset(new CatalogRelationConstraints());
+ }
}
bool CatalogRelationSchema::ProtoIsValid(const serialization::CatalogRelationSchema &proto) {
@@ -84,6 +91,12 @@ bool CatalogRelationSchema::ProtoIsValid(const serialization::CatalogRelationSch
}
}
+ if (proto.has_constraints()
+ && !CatalogRelationConstraints::ProtoIsValid(proto.constraints(),
+ proto.attributes_size())) {
+ return false;
+ }
+
return true;
}
@@ -104,6 +117,8 @@ serialization::CatalogRelationSchema CatalogRelationSchema::getProto() const {
}
}
+ proto.mutable_constraints()->CopyFrom(constraints_->getProto());
+
return proto;
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/catalog/CatalogRelationSchema.hpp
----------------------------------------------------------------------
diff --git a/catalog/CatalogRelationSchema.hpp b/catalog/CatalogRelationSchema.hpp
index d773bc7..0c6c207 100644
--- a/catalog/CatalogRelationSchema.hpp
+++ b/catalog/CatalogRelationSchema.hpp
@@ -21,12 +21,14 @@
#define QUICKSTEP_CATALOG_CATALOG_RELATION_SCHEMA_HPP_
#include <cstddef>
+#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "catalog/Catalog.pb.h"
#include "catalog/CatalogAttribute.hpp"
+#include "catalog/CatalogRelationConstraints.hpp"
#include "catalog/CatalogTypedefs.hpp"
#include "utility/Macros.hpp"
#include "utility/PtrVector.hpp"
@@ -427,6 +429,14 @@ class CatalogRelationSchema {
return max_byte_lengths_;
}
+ const CatalogRelationConstraints& getConstraints() const {
+ return *constraints_;
+ }
+
+ CatalogRelationConstraints* getConstraintsMutable() {
+ return constraints_.get();
+ }
+
protected:
/**
* @brief Create a new relation.
@@ -456,7 +466,8 @@ class CatalogRelationSchema {
min_variable_byte_length_excluding_nullable_(0),
estimated_variable_byte_length_(0),
current_nullable_attribute_index_(-1),
- current_variable_length_attribute_index_(-1) {
+ current_variable_length_attribute_index_(-1),
+ constraints_(new CatalogRelationConstraints()) {
}
/**
@@ -532,6 +543,9 @@ class CatalogRelationSchema {
std::vector<int> variable_length_attribute_indices_;
int current_variable_length_attribute_index_;
+ // Primary key, foreign keys, etc.
+ std::unique_ptr<CatalogRelationConstraints> constraints_;
+
private:
friend class CatalogDatabase;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/cli/CommandExecutor.cpp
----------------------------------------------------------------------
diff --git a/cli/CommandExecutor.cpp b/cli/CommandExecutor.cpp
index 7083ef5..bff8c11 100644
--- a/cli/CommandExecutor.cpp
+++ b/cli/CommandExecutor.cpp
@@ -252,7 +252,8 @@ inline TypedValue executeQueryForSingleResult(
return value;
}
-void executeAnalyze(const tmb::client_id main_thread_client_id,
+void executeAnalyze(const PtrVector<ParseString> *arguments,
+ const tmb::client_id main_thread_client_id,
const tmb::client_id foreman_client_id,
MessageBus *bus,
QueryProcessor *query_processor,
@@ -261,8 +262,19 @@ void executeAnalyze(const tmb::client_id main_thread_client_id,
StorageManager *storage_manager = query_processor->getStorageManager();
std::unique_ptr<SqlParserWrapper> parser_wrapper(new SqlParserWrapper());
- std::vector<std::reference_wrapper<const CatalogRelation>> relations(
- database.begin(), database.end());
+ std::vector<std::reference_wrapper<const CatalogRelation>> relations;
+ if (arguments->size() == 0) {
+ relations.insert(relations.begin(), database.begin(), database.end());
+ } else {
+ for (const auto &rel_name : *arguments) {
+ const CatalogRelation *rel = database.getRelationByName(rel_name.value());
+ if (rel == nullptr) {
+ THROW_SQL_ERROR_AT(&rel_name) << "Table does not exist";
+ } else {
+ relations.emplace_back(*rel);
+ }
+ }
+ }
// Analyze each relation in the database.
for (const CatalogRelation &relation : relations) {
@@ -342,8 +354,11 @@ void executeCommand(const ParseStatement &statement,
executeDescribeTable(arguments, catalog_database, out);
}
} else if (command_str == C::kAnalyzeCommand) {
- executeAnalyze(
- main_thread_client_id, foreman_client_id, bus, query_processor, out);
+ executeAnalyze(arguments,
+ main_thread_client_id,
+ foreman_client_id,
+ bus,
+ query_processor, out);
} else {
THROW_SQL_ERROR_AT(command.command()) << "Invalid Command";
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/cli/QuickstepCli.cpp
----------------------------------------------------------------------
diff --git a/cli/QuickstepCli.cpp b/cli/QuickstepCli.cpp
index 02a55a0..24f053b 100644
--- a/cli/QuickstepCli.cpp
+++ b/cli/QuickstepCli.cpp
@@ -52,6 +52,9 @@ typedef quickstep::LineReaderDumb LineReaderImpl;
#include <gperftools/profiler.h>
#endif
+#include "catalog/CatalogDatabase.hpp"
+#include "catalog/CatalogRelation.hpp"
+#include "catalog/CatalogRelationConstraints.hpp"
#include "cli/DefaultsConfigurator.hpp"
#include "cli/InputParserUtil.hpp"
#include "cli/PrintToScreen.hpp"
@@ -75,6 +78,8 @@ typedef quickstep::LineReaderDumb LineReaderImpl;
#include "storage/PreloaderThread.hpp"
#include "threading/ThreadIDBasedMap.hpp"
+#include "utility/DAGVisualizer.hpp"
+#include "utility/EventProfiler.hpp"
#include "utility/Macros.hpp"
#include "utility/PtrVector.hpp"
#include "utility/SqlError.hpp"
@@ -89,6 +94,8 @@ typedef quickstep::LineReaderDumb LineReaderImpl;
#include "tmb/message_bus.h"
#include "tmb/message_style.h"
+#include "google/protobuf/text_format.h"
+
namespace quickstep {
class CatalogRelation;
}
@@ -185,9 +192,33 @@ DEFINE_string(profile_file_name, "",
// To put things in perspective, the first run is, in my experiments, about 5-10
// times more expensive than the average run. That means the query needs to be
// run at least a hundred times to make the impact of the first run small (< 5 %).
+DEFINE_string(profile_output, "",
+ "Output file name for writing the profiled events.");
+DEFINE_bool(visualize_dag, false,
+ "If true, visualize the execution plan DAG into a graph in DOT format.");
} // namespace quickstep
+void addPrimaryKeyInfoForTPCHTables(quickstep::CatalogDatabase *database) {
+ const std::vector<std::pair<std::string, std::vector<std::string>>> rel_pkeys = {
+ { "region", { "r_regionkey" } },
+ { "nation", { "n_nationkey" } },
+ { "supplier", { "s_suppkey" } },
+ { "customer", { "c_custkey" } },
+ { "part", { "p_partkey" } },
+ { "partsupp", { "ps_partkey", "ps_suppkey" } },
+ { "orders", { "o_orderkey" } }
+ };
+ for (const auto &rel_pair : rel_pkeys) {
+ CatalogRelation *rel = database->getRelationByNameMutable(rel_pair.first);
+ std::vector<quickstep::attribute_id> attrs;
+ for (const auto &pkey : rel_pair.second) {
+ attrs.emplace_back(rel->getAttributeByName(pkey)->getID());
+ }
+ rel->getConstraintsMutable()->setPrimaryKey(attrs);
+ }
+}
+
int main(int argc, char* argv[]) {
google::InitGoogleLogging(argv[0]);
gflags::ParseCommandLineFlags(&argc, &argv, true);
@@ -295,6 +326,12 @@ int main(int argc, char* argv[]) {
LOG(FATAL) << "NON-STANDARD EXCEPTION DURING STARTUP";
}
+// addPrimaryKeyInfoForTPCHTables(query_processor->getDefaultDatabase());
+// std::string proto_str;
+// google::protobuf::TextFormat::PrintToString(
+// query_processor->getDefaultDatabase()->getProto(), &proto_str);
+// std::cerr << proto_str << "\n";
+
// Parse the CPU affinities for workers and the preloader thread, if enabled
// to warm up the buffer pool.
const vector<int> worker_cpu_affinities =
@@ -433,6 +470,8 @@ int main(int argc, char* argv[]) {
}
DCHECK(query_handle->getQueryPlanMutable() != nullptr);
+ quickstep::simple_profiler.clear();
+ quickstep::relop_profiler.clear();
start = std::chrono::steady_clock::now();
QueryExecutionUtil::ConstructAndSendAdmitRequestMessage(
main_thread_client_id,
@@ -445,6 +484,11 @@ int main(int argc, char* argv[]) {
main_thread_client_id, &bus);
end = std::chrono::steady_clock::now();
+ if (quickstep::FLAGS_visualize_dag) {
+ quickstep::DAGVisualizer visualizer(*query_handle->getQueryPlanMutable());
+ std::cerr << "\n" << visualizer.toDOT() << "\n";
+ }
+
const CatalogRelation *query_result_relation = query_handle->getQueryResultRelation();
if (query_result_relation) {
PrintToScreen::PrintRelation(*query_result_relation,
@@ -470,6 +514,11 @@ int main(int argc, char* argv[]) {
foreman.printWorkOrderProfilingResults(query_handle->query_id(),
stdout);
}
+ if (!quickstep::FLAGS_profile_output.empty()) {
+ std::ofstream ofs(quickstep::FLAGS_profile_output, std::ios::out);
+ quickstep::simple_profiler.writeToStream(ofs);
+ ofs.close();
+ }
} catch (const std::exception &e) {
fprintf(stderr, "QUERY EXECUTION ERROR: %s\n", e.what());
break;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/compression/CompressionDictionaryLite.hpp
----------------------------------------------------------------------
diff --git a/compression/CompressionDictionaryLite.hpp b/compression/CompressionDictionaryLite.hpp
index 45019c0..8c7741f 100644
--- a/compression/CompressionDictionaryLite.hpp
+++ b/compression/CompressionDictionaryLite.hpp
@@ -174,6 +174,15 @@ class CompressionDictionaryLite {
}
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLengthForCode(const std::uint32_t code) const {
+ if (type_is_variable_length_) {
+ return variableLengthGetUntypedValueAndByteLengthHelper<std::uint32_t, check_null>(code);
+ } else {
+ return fixedLengthGetUntypedValueAndByteLengthHelper<std::uint32_t, check_null>(code);
+ }
+ }
+
/**
* @brief Get the value represented by the specified code as a TypedValue.
* @note This version is for codes of 8 bits or less. Also see
@@ -255,6 +264,39 @@ class CompressionDictionaryLite {
return retval;
}
+ template <typename CodeType, bool check_null = true>
+ inline std::pair<const void*, std::size_t> fixedLengthGetUntypedValueAndByteLengthHelper(
+ const CodeType code) const {
+ if (check_null && (code == getNullCode())) {
+ return std::make_pair(nullptr, 0);
+ }
+ DCHECK_LT(code, numberOfCodes());
+ return std::make_pair(static_cast<const char*>(dictionary_memory_)
+ + 2 * sizeof(std::uint32_t) // Header.
+ + code * type_fixed_byte_length_, // Index into value array.
+ type_fixed_byte_length_);
+ }
+
+ template <typename CodeType, bool check_null = true>
+ inline std::pair<const void*, std::size_t> variableLengthGetUntypedValueAndByteLengthHelper(
+ const CodeType code) const {
+ if (check_null && (code == getNullCode())) {
+ return std::make_pair(nullptr, 0);
+ }
+ DCHECK_LT(code, numberOfCodes());
+
+ const std::uint32_t value_offset = static_cast<const std::uint32_t*>(dictionary_memory_)[code + 2];
+ const void *data_ptr = variable_length_data_region_ + value_offset;
+ DCHECK_LT(data_ptr, static_cast<const char*>(dictionary_memory_) + dictionary_memory_size_);
+
+ std::size_t data_size = (code == *static_cast<const std::uint32_t*>(dictionary_memory_) - 1) ?
+ (static_cast<const char*>(dictionary_memory_)
+ + dictionary_memory_size_
+ - static_cast<const char*>(data_ptr))
+ : (static_cast<const std::uint32_t*>(dictionary_memory_)[code + 3] - value_offset);
+ return std::make_pair(data_ptr, data_size);
+ }
+
template <typename CodeType>
inline TypedValue fixedLengthGetTypedValueHelper(const CodeType code) const {
if (code == getNullCode()) {
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/query_execution/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_execution/CMakeLists.txt b/query_execution/CMakeLists.txt
index b031a44..5facbb0 100644
--- a/query_execution/CMakeLists.txt
+++ b/query_execution/CMakeLists.txt
@@ -186,6 +186,7 @@ target_link_libraries(quickstep_queryexecution_Worker
quickstep_threading_Thread
quickstep_threading_ThreadIDBasedMap
quickstep_threading_ThreadUtil
+ quickstep_utility_EventProfiler
quickstep_utility_Macros
tmb)
target_link_libraries(quickstep_queryexecution_WorkerDirectory
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/query_execution/Worker.cpp
----------------------------------------------------------------------
diff --git a/query_execution/Worker.cpp b/query_execution/Worker.cpp
index 6ba27f1..f94089f 100644
--- a/query_execution/Worker.cpp
+++ b/query_execution/Worker.cpp
@@ -29,6 +29,7 @@
#include "relational_operators/WorkOrder.hpp"
#include "threading/ThreadIDBasedMap.hpp"
#include "threading/ThreadUtil.hpp"
+#include "utility/EventProfiler.hpp"
#include "glog/logging.h"
@@ -116,8 +117,12 @@ void Worker::executeWorkOrderHelper(const TaggedMessage &tagged_message,
const size_t query_id_for_workorder = worker_message.getWorkOrder()->getQueryID();
// Start measuring the execution time.
+ auto *container = relop_profiler.getContainer();
+ auto *line = container->getEventLine(worker_message.getRelationalOpIndex());
start = std::chrono::steady_clock::now();
+ line->emplace_back();
worker_message.getWorkOrder()->execute();
+ line->back().endEvent();
end = std::chrono::steady_clock::now();
delete worker_message.getWorkOrder();
const uint64_t execution_time_microseconds =
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/query_execution/tests/QueryManager_unittest.cpp
----------------------------------------------------------------------
diff --git a/query_execution/tests/QueryManager_unittest.cpp b/query_execution/tests/QueryManager_unittest.cpp
index 37e2cdd..dd3196b 100644
--- a/query_execution/tests/QueryManager_unittest.cpp
+++ b/query_execution/tests/QueryManager_unittest.cpp
@@ -105,6 +105,10 @@ class MockOperator: public RelationalOperator {
num_calls_donefeedingblocks_(0) {
}
+ std::string getName() const override {
+ return "MockOperator";
+ }
+
#define MOCK_OP_LOG(x) VLOG(x) << "Op[" << op_index_ << "]: " << __func__ << ": "
// The methods below are used to check whether QueryManager calls the Relational
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/query_optimizer/ExecutionHeuristics.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionHeuristics.cpp b/query_optimizer/ExecutionHeuristics.cpp
index fc31c53..1f2163e 100644
--- a/query_optimizer/ExecutionHeuristics.cpp
+++ b/query_optimizer/ExecutionHeuristics.cpp
@@ -107,20 +107,9 @@ void ExecutionHeuristics::optimizeExecutionPlan(QueryPlan *query_plan,
void ExecutionHeuristics::setBloomFilterProperties(serialization::BloomFilter *bloom_filter_proto,
const CatalogRelation *relation) {
- const std::size_t cardinality = relation->estimateTupleCardinality();
- if (cardinality < kOneThousand) {
- bloom_filter_proto->set_bloom_filter_size(kOneThousand / kCompressionFactor);
- bloom_filter_proto->set_number_of_hashes(kVeryLowSparsityHash);
- } else if (cardinality < kTenThousand) {
- bloom_filter_proto->set_bloom_filter_size(kTenThousand / kCompressionFactor);
- bloom_filter_proto->set_number_of_hashes(kLowSparsityHash);
- } else if (cardinality < kHundredThousand) {
- bloom_filter_proto->set_bloom_filter_size(kHundredThousand / kCompressionFactor);
- bloom_filter_proto->set_number_of_hashes(kMediumSparsityHash);
- } else {
- bloom_filter_proto->set_bloom_filter_size(kMillion / kCompressionFactor);
- bloom_filter_proto->set_number_of_hashes(kHighSparsityHash);
- }
+ const std::size_t cardinality = relation->getStatistics().getNumTuples();
+ bloom_filter_proto->set_bloom_filter_size(cardinality);
+ bloom_filter_proto->set_number_of_hashes(3);
}
} // namespace optimizer
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/query_optimizer/ExecutionHeuristics.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionHeuristics.hpp b/query_optimizer/ExecutionHeuristics.hpp
index 92a7fe8..c43d591 100644
--- a/query_optimizer/ExecutionHeuristics.hpp
+++ b/query_optimizer/ExecutionHeuristics.hpp
@@ -49,7 +49,7 @@ class ExecutionHeuristics {
static const std::size_t kHundredThousand = 100000;
static const std::size_t kMillion = 1000000;
- static const std::size_t kCompressionFactor = 10;
+ static const std::size_t kCompressionFactor = 1;
static const std::size_t kVeryLowSparsityHash = 1;
static const std::size_t kLowSparsityHash = 2;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp
index 75a7bc9..ee133b5 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -108,7 +108,7 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
DVLOG(4) << "Optimized physical plan:\n" << physical_plan_->toString();
if (FLAGS_visualize_plan) {
- quickstep::PlanVisualizer plan_visualizer;
+ quickstep::PlanVisualizer plan_visualizer;
std::cerr << "\n" << plan_visualizer.visualize(physical_plan_) << "\n";
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/query_optimizer/cost_model/SimpleCostModel.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/SimpleCostModel.cpp b/query_optimizer/cost_model/SimpleCostModel.cpp
index 48f76fa..8f31265 100644
--- a/query_optimizer/cost_model/SimpleCostModel.cpp
+++ b/query_optimizer/cost_model/SimpleCostModel.cpp
@@ -84,7 +84,7 @@ std::size_t SimpleCostModel::estimateCardinalityForTopLevelPlan(
std::size_t SimpleCostModel::estimateCardinalityForTableReference(
const P::TableReferencePtr &physical_plan) {
- return physical_plan->relation()->estimateTupleCardinality();
+ return physical_plan->relation()->getStatistics().getNumTuples();
}
std::size_t SimpleCostModel::estimateCardinalityForSelection(
@@ -115,7 +115,7 @@ std::size_t SimpleCostModel::estimateCardinalityForAggregate(
return 1;
}
return std::max(static_cast<std::size_t>(1),
- estimateCardinality(physical_plan->input()) / 10);
+ estimateCardinality(physical_plan->input()));
}
} // namespace cost
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
index 9770606..9357590 100644
--- a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
+++ b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
@@ -281,6 +281,7 @@ physical::PhysicalPtr StarSchemaHashJoinOrderOptimization::generatePlan(
// TODO(jianqiao): Cache the estimated cardinality for each plan in cost
// model to avoid duplicated estimation.
second_table_info->estimated_cardinality = cost_model_->estimateCardinality(output);
+// second_table_info->estimated_selectivity = cost_model_->estimateSelectivity(output);
second_table_info->join_attribute_pairs.insert(first_table_info->join_attribute_pairs.begin(),
first_table_info->join_attribute_pairs.end());
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/relational_operators/AggregationOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/AggregationOperator.hpp b/relational_operators/AggregationOperator.hpp
index 4bcbcf6..c46ba2c 100644
--- a/relational_operators/AggregationOperator.hpp
+++ b/relational_operators/AggregationOperator.hpp
@@ -77,6 +77,10 @@ class AggregationOperator : public RelationalOperator {
~AggregationOperator() override {}
+ std::string getName() const override {
+ return "AggregationOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/relational_operators/BuildHashOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/BuildHashOperator.hpp b/relational_operators/BuildHashOperator.hpp
index 464bbf8..952c7ac 100644
--- a/relational_operators/BuildHashOperator.hpp
+++ b/relational_operators/BuildHashOperator.hpp
@@ -93,6 +93,10 @@ class BuildHashOperator : public RelationalOperator {
~BuildHashOperator() override {}
+ std::string getName() const override {
+ return "BuildHashOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/relational_operators/CreateIndexOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/CreateIndexOperator.hpp b/relational_operators/CreateIndexOperator.hpp
index 18ca656..4e05448 100644
--- a/relational_operators/CreateIndexOperator.hpp
+++ b/relational_operators/CreateIndexOperator.hpp
@@ -69,6 +69,10 @@ class CreateIndexOperator : public RelationalOperator {
~CreateIndexOperator() override {}
+ std::string getName() const override {
+ return "CreateIndexOperator";
+ }
+
/**
* @note No WorkOrder generated for this operator.
**/
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/relational_operators/CreateTableOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/CreateTableOperator.hpp b/relational_operators/CreateTableOperator.hpp
index 6d91142..b7b707b 100644
--- a/relational_operators/CreateTableOperator.hpp
+++ b/relational_operators/CreateTableOperator.hpp
@@ -66,6 +66,10 @@ class CreateTableOperator : public RelationalOperator {
~CreateTableOperator() override {}
+ std::string getName() const override {
+ return "CreateTableOperator";
+ }
+
/**
* @note No WorkOrder generated for this operator.
**/
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/relational_operators/DeleteOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/DeleteOperator.hpp b/relational_operators/DeleteOperator.hpp
index 74da8c1..abfe4a9 100644
--- a/relational_operators/DeleteOperator.hpp
+++ b/relational_operators/DeleteOperator.hpp
@@ -81,6 +81,10 @@ class DeleteOperator : public RelationalOperator {
~DeleteOperator() override {}
+ std::string getName() const override {
+ return "DeleteOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/relational_operators/DestroyHashOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/DestroyHashOperator.hpp b/relational_operators/DestroyHashOperator.hpp
index 181386f..ae65de5 100644
--- a/relational_operators/DestroyHashOperator.hpp
+++ b/relational_operators/DestroyHashOperator.hpp
@@ -58,6 +58,10 @@ class DestroyHashOperator : public RelationalOperator {
~DestroyHashOperator() override {}
+ std::string getName() const override {
+ return "DestroyHashOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/relational_operators/DropTableOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/DropTableOperator.hpp b/relational_operators/DropTableOperator.hpp
index 6c7fca3..f854b4f 100644
--- a/relational_operators/DropTableOperator.hpp
+++ b/relational_operators/DropTableOperator.hpp
@@ -74,6 +74,10 @@ class DropTableOperator : public RelationalOperator {
~DropTableOperator() override {}
+ std::string getName() const override {
+ return "DropTableOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/relational_operators/FinalizeAggregationOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/FinalizeAggregationOperator.hpp b/relational_operators/FinalizeAggregationOperator.hpp
index 158a637..0dcfc9e 100644
--- a/relational_operators/FinalizeAggregationOperator.hpp
+++ b/relational_operators/FinalizeAggregationOperator.hpp
@@ -74,6 +74,10 @@ class FinalizeAggregationOperator : public RelationalOperator {
~FinalizeAggregationOperator() override {}
+ std::string getName() const override {
+ return "FinalizeAggregationOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/relational_operators/HashJoinOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.hpp b/relational_operators/HashJoinOperator.hpp
index 5d3d7da..0b9e72b 100644
--- a/relational_operators/HashJoinOperator.hpp
+++ b/relational_operators/HashJoinOperator.hpp
@@ -157,6 +157,21 @@ class HashJoinOperator : public RelationalOperator {
~HashJoinOperator() override {}
+ std::string getName() const override {
+ switch (join_type_) {
+ case JoinType::kInnerJoin:
+ return "HashJoinOperator";
+ case JoinType::kLeftSemiJoin:
+ return "HashJoinOperator(LeftSemi)";
+ case JoinType::kLeftAntiJoin:
+ return "HashJoinOperator(LeftAnti)";
+ case JoinType::kLeftOuterJoin:
+ return "HashJoinOperator(LeftOuter)";
+ default: break;
+ }
+ LOG(FATAL) << "Unknown join type in HashJoinOperator::getName()";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
@@ -283,8 +298,9 @@ class HashInnerJoinWorkOrder : public WorkOrder {
const std::vector<std::unique_ptr<const Scalar>> &selection,
const JoinHashTable &hash_table,
InsertDestination *output_destination,
- StorageManager *storage_manager)
- : WorkOrder(query_id),
+ StorageManager *storage_manager,
+ const int op_index = -1)
+ : WorkOrder(query_id, op_index),
build_relation_(build_relation),
probe_relation_(probe_relation),
join_key_attributes_(join_key_attributes),
@@ -411,8 +427,9 @@ class HashSemiJoinWorkOrder : public WorkOrder {
const std::vector<std::unique_ptr<const Scalar>> &selection,
const JoinHashTable &hash_table,
InsertDestination *output_destination,
- StorageManager *storage_manager)
- : WorkOrder(query_id),
+ StorageManager *storage_manager,
+ const int op_index = -1)
+ : WorkOrder(query_id, op_index),
build_relation_(build_relation),
probe_relation_(probe_relation),
join_key_attributes_(join_key_attributes),
@@ -458,8 +475,9 @@ class HashSemiJoinWorkOrder : public WorkOrder {
const std::vector<std::unique_ptr<const Scalar>> &selection,
const JoinHashTable &hash_table,
InsertDestination *output_destination,
- StorageManager *storage_manager)
- : WorkOrder(query_id),
+ StorageManager *storage_manager,
+ const int op_index = -1)
+ : WorkOrder(query_id, op_index),
build_relation_(build_relation),
probe_relation_(probe_relation),
join_key_attributes_(std::move(join_key_attributes)),
@@ -535,8 +553,9 @@ class HashAntiJoinWorkOrder : public WorkOrder {
const std::vector<std::unique_ptr<const Scalar>> &selection,
const JoinHashTable &hash_table,
InsertDestination *output_destination,
- StorageManager *storage_manager)
- : WorkOrder(query_id),
+ StorageManager *storage_manager,
+ const int op_index = -1)
+ : WorkOrder(query_id, op_index),
build_relation_(build_relation),
probe_relation_(probe_relation),
join_key_attributes_(join_key_attributes),
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/relational_operators/InsertOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/InsertOperator.hpp b/relational_operators/InsertOperator.hpp
index 78f5199..2c6aca7 100644
--- a/relational_operators/InsertOperator.hpp
+++ b/relational_operators/InsertOperator.hpp
@@ -73,6 +73,10 @@ class InsertOperator : public RelationalOperator {
~InsertOperator() override {}
+ std::string getName() const override {
+ return "InsertOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/relational_operators/NestedLoopsJoinOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/NestedLoopsJoinOperator.hpp b/relational_operators/NestedLoopsJoinOperator.hpp
index 992e76d..cf190fe 100644
--- a/relational_operators/NestedLoopsJoinOperator.hpp
+++ b/relational_operators/NestedLoopsJoinOperator.hpp
@@ -116,6 +116,10 @@ class NestedLoopsJoinOperator : public RelationalOperator {
~NestedLoopsJoinOperator() override {}
+ std::string getName() const override {
+ return "NestedLoopsJoinOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/relational_operators/RelationalOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/RelationalOperator.hpp b/relational_operators/RelationalOperator.hpp
index 116727b..65cd213 100644
--- a/relational_operators/RelationalOperator.hpp
+++ b/relational_operators/RelationalOperator.hpp
@@ -55,6 +55,13 @@ class RelationalOperator {
virtual ~RelationalOperator() {}
/**
+ * @brief Get the name of this relational operator.
+ *
+ * @return The name of this relational operator.
+ */
+ virtual std::string getName() const = 0;
+
+ /**
* @brief Generate all the next WorkOrders for this RelationalOperator.
*
* @note If a RelationalOperator has blocking dependencies, it should not
@@ -226,6 +233,15 @@ class RelationalOperator {
op_index_ = operator_index;
}
+ /**
+ * @brief Get the index of this operator in the query plan DAG.
+ *
+ * @return The index of this operator in the query plan DAG.
+ */
+ std::size_t getOperatorIndex() const {
+ return op_index_;
+ }
+
protected:
/**
* @brief Constructor
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/relational_operators/SampleOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/SampleOperator.hpp b/relational_operators/SampleOperator.hpp
index f8fe5f6..08f08c8 100644
--- a/relational_operators/SampleOperator.hpp
+++ b/relational_operators/SampleOperator.hpp
@@ -93,6 +93,10 @@ class SampleOperator : public RelationalOperator {
~SampleOperator() override {}
+ std::string getName() const override {
+ return "SampleOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/relational_operators/SaveBlocksOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/SaveBlocksOperator.hpp b/relational_operators/SaveBlocksOperator.hpp
index 50032b6..ebc5ffc 100644
--- a/relational_operators/SaveBlocksOperator.hpp
+++ b/relational_operators/SaveBlocksOperator.hpp
@@ -64,6 +64,10 @@ class SaveBlocksOperator : public RelationalOperator {
~SaveBlocksOperator() override {}
+ std::string getName() const override {
+ return "SaveBlocksOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/relational_operators/SelectOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/SelectOperator.hpp b/relational_operators/SelectOperator.hpp
index 0c10686..ee25886 100644
--- a/relational_operators/SelectOperator.hpp
+++ b/relational_operators/SelectOperator.hpp
@@ -189,6 +189,10 @@ class SelectOperator : public RelationalOperator {
~SelectOperator() override {}
+ std::string getName() const override {
+ return "SelectOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/relational_operators/SortMergeRunOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/SortMergeRunOperator.hpp b/relational_operators/SortMergeRunOperator.hpp
index 177836f..9b07ad6 100644
--- a/relational_operators/SortMergeRunOperator.hpp
+++ b/relational_operators/SortMergeRunOperator.hpp
@@ -129,6 +129,10 @@ class SortMergeRunOperator : public RelationalOperator {
**/
~SortMergeRunOperator() {}
+ std::string getName() const override {
+ return "SortMergeRunOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/relational_operators/SortRunGenerationOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/SortRunGenerationOperator.hpp b/relational_operators/SortRunGenerationOperator.hpp
index 96a3ce1..54c7feb 100644
--- a/relational_operators/SortRunGenerationOperator.hpp
+++ b/relational_operators/SortRunGenerationOperator.hpp
@@ -109,6 +109,10 @@ class SortRunGenerationOperator : public RelationalOperator {
~SortRunGenerationOperator() {}
+ std::string getName() const override {
+ return "SortRunGenerationOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/relational_operators/TableGeneratorOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/TableGeneratorOperator.hpp b/relational_operators/TableGeneratorOperator.hpp
index 1b791a6..15e7052 100644
--- a/relational_operators/TableGeneratorOperator.hpp
+++ b/relational_operators/TableGeneratorOperator.hpp
@@ -76,6 +76,10 @@ class TableGeneratorOperator : public RelationalOperator {
~TableGeneratorOperator() override {}
+ std::string getName() const override {
+ return "TableGeneratorOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/relational_operators/TextScanOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/TextScanOperator.hpp b/relational_operators/TextScanOperator.hpp
index 1a62ded..6890d7d 100644
--- a/relational_operators/TextScanOperator.hpp
+++ b/relational_operators/TextScanOperator.hpp
@@ -134,6 +134,10 @@ class TextScanOperator : public RelationalOperator {
~TextScanOperator() override {}
+ std::string getName() const override {
+ return "TextScanOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/relational_operators/UpdateOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/UpdateOperator.hpp b/relational_operators/UpdateOperator.hpp
index 4471a17..d021844 100644
--- a/relational_operators/UpdateOperator.hpp
+++ b/relational_operators/UpdateOperator.hpp
@@ -94,6 +94,10 @@ class UpdateOperator : public RelationalOperator {
~UpdateOperator() override {}
+ std::string getName() const override {
+ return "UpdateOperator";
+ }
+
bool getAllWorkOrders(WorkOrdersContainer *container,
QueryContext *query_context,
StorageManager *storage_manager,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/relational_operators/WorkOrder.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/WorkOrder.hpp b/relational_operators/WorkOrder.hpp
index df195cc..4eb6b3a 100644
--- a/relational_operators/WorkOrder.hpp
+++ b/relational_operators/WorkOrder.hpp
@@ -299,16 +299,23 @@ class WorkOrder {
return query_id_;
}
+ inline const int getOperatorIndex() const {
+ return op_index_;
+ }
+
protected:
/**
* @brief Constructor.
*
* @param query_id The ID of the query to which this WorkOrder belongs.
**/
- explicit WorkOrder(const std::size_t query_id)
- : query_id_(query_id) {}
+ explicit WorkOrder(const std::size_t query_id,
+ const int op_index = -1)
+ : query_id_(query_id),
+ op_index_(op_index) {}
const std::size_t query_id_;
+ const int op_index_;
// A vector of preferred NUMA node IDs where this workorder should be executed.
// These node IDs typically indicate the NUMA node IDs of the input(s) of the
// workorder. Derived classes should ensure that there are no duplicate entries
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/storage/BasicColumnStoreValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/BasicColumnStoreValueAccessor.hpp b/storage/BasicColumnStoreValueAccessor.hpp
index 759e187..7907fd5 100644
--- a/storage/BasicColumnStoreValueAccessor.hpp
+++ b/storage/BasicColumnStoreValueAccessor.hpp
@@ -18,6 +18,8 @@
#ifndef QUICKSTEP_STORAGE_BASIC_COLUMN_STORE_VALUE_ACCESSOR_HPP_
#define QUICKSTEP_STORAGE_BASIC_COLUMN_STORE_VALUE_ACCESSOR_HPP_
+#include <cstddef>
+#include <utility>
#include <vector>
#include "catalog/CatalogRelationSchema.hpp"
@@ -43,7 +45,8 @@ class BasicColumnStoreValueAccessorHelper {
: relation_(relation),
num_tuples_(num_tuples),
column_stripes_(column_stripes),
- column_null_bitmaps_(column_null_bitmaps) {
+ column_null_bitmaps_(column_null_bitmaps),
+ attr_max_lengths_(relation.getMaximumAttributeByteLengths()) {
}
inline tuple_id numPackedTuples() const {
@@ -61,9 +64,23 @@ class BasicColumnStoreValueAccessorHelper {
return nullptr;
}
- // TODO(chasseur): Consider cacheing the byte lengths of attributes.
- return static_cast<const char*>(column_stripes_[attr])
- + (tuple * relation_.getAttributeById(attr)->getType().maximumByteLength());
+ return static_cast<const char*>(column_stripes_[attr]) + (tuple * attr_max_lengths_[attr]);
+ }
+
+ template <bool check_null>
+ inline std::pair<const void*, std::size_t> getAttributeValueAndByteLength(const tuple_id tuple,
+ const attribute_id attr) const {
+ DEBUG_ASSERT(tuple < num_tuples_);
+ DEBUG_ASSERT(relation_.hasAttributeWithId(attr));
+ if (check_null
+ && (!column_null_bitmaps_.elementIsNull(attr))
+ && column_null_bitmaps_[attr].getBit(tuple)) {
+ return std::make_pair(nullptr, 0);
+ }
+
+ const std::size_t attr_length = attr_max_lengths_[attr];
+ return std::make_pair(static_cast<const char*>(column_stripes_[attr]) + (tuple * attr_length),
+ attr_length);
}
inline TypedValue getAttributeValueTyped(const tuple_id tuple,
@@ -80,6 +97,7 @@ class BasicColumnStoreValueAccessorHelper {
const tuple_id num_tuples_;
const std::vector<void*> &column_stripes_;
const PtrVector<BitVector<false>, true> &column_null_bitmaps_;
+ const std::vector<std::size_t> &attr_max_lengths_;
DISALLOW_COPY_AND_ASSIGN(BasicColumnStoreValueAccessorHelper);
};
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/storage/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/storage/CMakeLists.txt b/storage/CMakeLists.txt
index b536411..38bc507 100644
--- a/storage/CMakeLists.txt
+++ b/storage/CMakeLists.txt
@@ -669,6 +669,8 @@ target_link_libraries(quickstep_storage_HashTable
quickstep_types_Type
quickstep_types_TypedValue
quickstep_utility_BloomFilter
+ quickstep_utility_BloomFilterAdapter
+ quickstep_utility_EventProfiler
quickstep_utility_HashPair
quickstep_utility_Macros)
target_link_libraries(quickstep_storage_HashTableBase
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/storage/CompressedColumnStoreValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/CompressedColumnStoreValueAccessor.hpp b/storage/CompressedColumnStoreValueAccessor.hpp
index 64eb315..984dea3 100644
--- a/storage/CompressedColumnStoreValueAccessor.hpp
+++ b/storage/CompressedColumnStoreValueAccessor.hpp
@@ -52,6 +52,7 @@ class CompressedColumnStoreValueAccessorHelper {
const PtrVector<BitVector<false>, true> &uncompressed_column_null_bitmaps)
: relation_(relation),
num_tuples_(num_tuples),
+ attr_max_lengths_(relation.getMaximumAttributeByteLengths()),
compression_info_(compression_info),
dictionary_coded_attributes_(dictionary_coded_attributes),
truncated_attributes_(truncated_attributes),
@@ -84,6 +85,26 @@ class CompressedColumnStoreValueAccessorHelper {
}
}
+ template <bool check_null>
+ inline std::pair<const void*, std::size_t> getAttributeValueAndByteLength(const tuple_id tuple,
+ const attribute_id attr) const {
+ if (dictionary_coded_attributes_[attr]) {
+ return dictionaries_.atUnchecked(attr).getUntypedValueAndByteLengthForCode<check_null>(
+ getCode(tuple, attr));
+ } else if (truncated_attributes_[attr]) {
+ if (truncated_attribute_is_int_[attr]) {
+ int_buffer_ = getCode(tuple, attr);
+ return std::make_pair(&int_buffer_, sizeof(int_buffer_));
+ } else {
+ long_buffer_ = getCode(tuple, attr);
+ return std::make_pair(&long_buffer_, sizeof(long_buffer_));
+ }
+ } else {
+ return std::make_pair(getAttributePtr<check_null>(tuple, attr),
+ attr_max_lengths_[attr]);
+ }
+ }
+
inline TypedValue getAttributeValueTyped(const tuple_id tuple,
const attribute_id attr) const {
if (dictionary_coded_attributes_[attr]) {
@@ -138,6 +159,7 @@ class CompressedColumnStoreValueAccessorHelper {
const CatalogRelationSchema &relation_;
const tuple_id num_tuples_;
+ const std::vector<std::size_t> &attr_max_lengths_;
const CompressedBlockInfo &compression_info_;
const std::vector<bool> &dictionary_coded_attributes_;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/storage/CompressedPackedRowStoreValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/CompressedPackedRowStoreValueAccessor.hpp b/storage/CompressedPackedRowStoreValueAccessor.hpp
index 024b0ec..7058aec 100644
--- a/storage/CompressedPackedRowStoreValueAccessor.hpp
+++ b/storage/CompressedPackedRowStoreValueAccessor.hpp
@@ -58,6 +58,7 @@ class CompressedPackedRowStoreValueAccessorHelper {
num_tuples_(num_tuples),
tuple_length_bytes_(tuple_length_bytes),
attribute_offsets_(attribute_offsets),
+ attr_max_lengths_(relation.getMaximumAttributeByteLengths()),
compression_info_(compression_info),
dictionary_coded_attributes_(dictionary_coded_attributes),
truncated_attributes_(truncated_attributes),
@@ -92,6 +93,26 @@ class CompressedPackedRowStoreValueAccessorHelper {
}
}
+ template <bool check_null>
+ inline std::pair<const void*, std::size_t> getAttributeValueAndByteLength(const tuple_id tuple,
+ const attribute_id attr) const {
+ if (dictionary_coded_attributes_[attr]) {
+ return dictionaries_.atUnchecked(attr).getUntypedValueAndByteLengthForCode<check_null>(
+ getCode(tuple, attr));
+ } else if (truncated_attributes_[attr]) {
+ if (truncated_attribute_is_int_[attr]) {
+ int_buffer_ = getCode(tuple, attr);
+ return std::make_pair(&int_buffer_, sizeof(int_buffer_));
+ } else {
+ long_buffer_ = getCode(tuple, attr);
+ return std::make_pair(&long_buffer_, sizeof(long_buffer_));
+ }
+ } else {
+ return std::make_pair(getAttributePtr<check_null>(tuple, attr),
+ attr_max_lengths_[attr]);
+ }
+ }
+
inline TypedValue getAttributeValueTyped(const tuple_id tuple,
const attribute_id attr) const {
if (dictionary_coded_attributes_[attr]) {
@@ -150,6 +171,7 @@ class CompressedPackedRowStoreValueAccessorHelper {
const tuple_id num_tuples_;
const std::size_t tuple_length_bytes_;
const std::vector<std::size_t> &attribute_offsets_;
+ const std::vector<std::size_t> &attr_max_lengths_;
const CompressedBlockInfo &compression_info_;
const std::vector<bool> &dictionary_coded_attributes_;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/storage/PackedRowStoreValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/PackedRowStoreValueAccessor.hpp b/storage/PackedRowStoreValueAccessor.hpp
index 03a975e..cbd273e 100644
--- a/storage/PackedRowStoreValueAccessor.hpp
+++ b/storage/PackedRowStoreValueAccessor.hpp
@@ -18,6 +18,8 @@
#ifndef QUICKSTEP_STORAGE_PACKED_ROW_STORE_VALUE_ACCESSOR_HPP_
#define QUICKSTEP_STORAGE_PACKED_ROW_STORE_VALUE_ACCESSOR_HPP_
+#include <utility>
+
#include "catalog/CatalogRelationSchema.hpp"
#include "catalog/CatalogTypedefs.hpp"
#include "storage/StorageBlockInfo.hpp"
@@ -40,7 +42,8 @@ class PackedRowStoreValueAccessorHelper {
: relation_(relation),
num_tuples_(num_tuples),
tuple_storage_(tuple_storage),
- null_bitmap_(null_bitmap) {
+ null_bitmap_(null_bitmap),
+ attr_max_lengths_(relation.getMaximumAttributeByteLengths()) {
}
inline tuple_id numPackedTuples() const {
@@ -65,6 +68,25 @@ class PackedRowStoreValueAccessorHelper {
+ relation_.getFixedLengthAttributeOffset(attr); // Attribute offset within tuple.
}
+ template <bool check_null>
+ inline std::pair<const void*, std::size_t> getAttributeValueAndByteLength(const tuple_id tuple,
+ const attribute_id attr) const {
+ DEBUG_ASSERT(tuple < num_tuples_);
+ DEBUG_ASSERT(relation_.hasAttributeWithId(attr));
+ if (check_null) {
+ const int nullable_idx = relation_.getNullableAttributeIndex(attr);
+ if ((nullable_idx != -1)
+ && null_bitmap_->getBit(tuple * relation_.numNullableAttributes() + nullable_idx)) {
+ return std::make_pair(nullptr, 0);
+ }
+ }
+
+ return std::make_pair(static_cast<const char*>(tuple_storage_)
+ + (tuple * relation_.getFixedByteLength())
+ + relation_.getFixedLengthAttributeOffset(attr),
+ attr_max_lengths_[attr]);
+ }
+
inline TypedValue getAttributeValueTyped(const tuple_id tuple,
const attribute_id attr) const {
const Type &attr_type = relation_.getAttributeById(attr)->getType();
@@ -79,6 +101,7 @@ class PackedRowStoreValueAccessorHelper {
const tuple_id num_tuples_;
const void *tuple_storage_;
const BitVector<false> *null_bitmap_;
+ const std::vector<std::size_t> &attr_max_lengths_;
DISALLOW_COPY_AND_ASSIGN(PackedRowStoreValueAccessorHelper);
};
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/storage/SplitRowStoreValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/SplitRowStoreValueAccessor.hpp b/storage/SplitRowStoreValueAccessor.hpp
index 9ea1a3a..19937f2 100644
--- a/storage/SplitRowStoreValueAccessor.hpp
+++ b/storage/SplitRowStoreValueAccessor.hpp
@@ -100,6 +100,11 @@ class SplitRowStoreValueAccessor : public ValueAccessor {
return getUntypedValueAtAbsolutePosition<check_null>(attr_id, current_position_);
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLength(const attribute_id attr_id) const {
+ return getUntypedValueAndByteLengthAtAbsolutePosition<check_null>(attr_id, current_position_);
+ }
+
inline TypedValue getTypedValue(const attribute_id attr_id) const {
return getTypedValueAtAbsolutePosition(attr_id, current_position_);
}
@@ -140,6 +145,44 @@ class SplitRowStoreValueAccessor : public ValueAccessor {
}
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLengthAtAbsolutePosition(const attribute_id attr_id,
+ const tuple_id tid) const {
+ DEBUG_ASSERT(occupancy_bitmap_.getBit(tid));
+ DEBUG_ASSERT(relation_.hasAttributeWithId(attr_id));
+ const char *tuple_slot = static_cast<const char*>(tuple_storage_)
+ + tuple_slot_bytes_ * tid;
+ if (check_null) {
+ const int nullable_idx = relation_.getNullableAttributeIndex(attr_id);
+ if (nullable_idx != -1) {
+ // const_cast is safe here. We will only be using read-only methods of
+ // BitVector.
+ BitVector<true> tuple_null_bitmap(const_cast<void*>(static_cast<const void*>(tuple_slot)),
+ relation_.numNullableAttributes());
+ if (tuple_null_bitmap.getBit(nullable_idx)) {
+ return std::make_pair(nullptr, 0);
+ }
+ }
+ }
+
+ const int variable_length_idx = relation_.getVariableLengthAttributeIndex(attr_id);
+ if (variable_length_idx == -1) {
+ // Fixed-length, stored in-line in slot.
+ return std::make_pair(tuple_slot + per_tuple_null_bitmap_bytes_
+ + relation_.getFixedLengthAttributeOffset(attr_id),
+ attr_max_lengths_[attr_id]);
+
+ } else {
+ // Variable-length, stored at back of block.
+ const std::uint32_t *pos_ptr = reinterpret_cast<const std::uint32_t*>(
+ tuple_slot + per_tuple_null_bitmap_bytes_
+ + relation_.getFixedByteLength()
+ + variable_length_idx * 2 * sizeof(std::uint32_t));
+ return std::make_pair(static_cast<const char*>(tuple_storage_) + pos_ptr[0],
+ pos_ptr[1]);
+ }
+ }
+
inline TypedValue getTypedValueAtAbsolutePosition(const attribute_id attr_id,
const tuple_id tid) const {
DEBUG_ASSERT(occupancy_bitmap_.getBit(tid));
@@ -317,6 +360,7 @@ class SplitRowStoreValueAccessor : public ValueAccessor {
tuple_storage_(tuple_storage),
tuple_slot_bytes_(tuple_slot_bytes),
per_tuple_null_bitmap_bytes_(per_tuple_null_bitmap_bytes),
+ attr_max_lengths_(relation.getMaximumAttributeByteLengths()),
current_position_(std::numeric_limits<std::size_t>::max()) {
}
@@ -327,6 +371,7 @@ class SplitRowStoreValueAccessor : public ValueAccessor {
const void *tuple_storage_;
const std::size_t tuple_slot_bytes_;
const std::size_t per_tuple_null_bitmap_bytes_;
+ const std::vector<std::size_t> &attr_max_lengths_;
std::size_t current_position_;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/storage/ValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/ValueAccessor.hpp b/storage/ValueAccessor.hpp
index e2a898e..e9370cc 100644
--- a/storage/ValueAccessor.hpp
+++ b/storage/ValueAccessor.hpp
@@ -375,6 +375,11 @@ class TupleIdSequenceAdapterValueAccessor : public ValueAccessor {
return accessor_->template getUntypedValueAtAbsolutePosition<check_null>(attr_id, *current_position_);
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLength(const attribute_id attr_id) const {
+ return accessor_->template getUntypedValueAndByteLengthAtAbsolutePosition<check_null>(attr_id, *current_position_);
+ }
+
inline TypedValue getTypedValue(const attribute_id attr_id) const {
return accessor_->getTypedValueAtAbsolutePosition(attr_id, *current_position_);
}
@@ -387,6 +392,13 @@ class TupleIdSequenceAdapterValueAccessor : public ValueAccessor {
}
// Pass-through.
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLengthAtAbsolutePosition(const attribute_id attr_id,
+ const tuple_id tid) const {
+ return accessor_->template getUntypedValueAndByteLengthAtAbsolutePosition<check_null>(attr_id, tid);
+ }
+
+ // Pass-through.
inline TypedValue getTypedValueAtAbsolutePosition(const attribute_id attr_id,
const tuple_id tid) const {
return accessor_->getTypedValueAtAbsolutePosition(attr_id, tid);
@@ -560,6 +572,12 @@ class OrderedTupleIdSequenceAdapterValueAccessor : public ValueAccessor {
id_sequence_[current_position_]);
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLength(const attribute_id attr_id) const {
+ return accessor_->template getUntypedValueAndByteLengthAtAbsolutePosition<check_null>(
+ attr_id, id_sequence_[current_position_]);
+ }
+
inline TypedValue getTypedValue(const attribute_id attr_id) const {
return accessor_->getTypedValueAtAbsolutePosition(attr_id, id_sequence_[current_position_]);
}
@@ -571,6 +589,13 @@ class OrderedTupleIdSequenceAdapterValueAccessor : public ValueAccessor {
"OrderedTupleIdSequenceAdapterValueAccessor");
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLengthAtAbsolutePosition(const attribute_id attr_id,
+ const tuple_id tid) const {
+ FATAL_ERROR("getUntypedValueAndByteLengthAtAbsolutePosition() not implemented in "
+ "OrderedTupleIdSequenceAdapterValueAccessor");
+ }
+
inline TypedValue getTypedValueAtAbsolutePosition(const attribute_id attr_id,
const tuple_id tid) const {
FATAL_ERROR("getTypedValueAtAbsolutePosition() not implemented in "
@@ -737,6 +762,11 @@ class PackedTupleStorageSubBlockValueAccessor : public ValueAccessor {
return getUntypedValueAtAbsolutePosition<check_null>(attr_id, current_tuple_);
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLength(const attribute_id attr_id) const {
+ return getUntypedValueAndByteLengthAtAbsolutePosition<check_null>(attr_id, current_tuple_);
+ }
+
inline TypedValue getTypedValue(const attribute_id attr_id) const {
return getTypedValueAtAbsolutePosition(attr_id, current_tuple_);
}
@@ -747,6 +777,12 @@ class PackedTupleStorageSubBlockValueAccessor : public ValueAccessor {
return helper_.template getAttributeValue<check_null>(tid, attr_id);
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLengthAtAbsolutePosition(const attribute_id attr_id,
+ const tuple_id tid) const {
+ return helper_.template getAttributeValueAndByteLength<check_null>(tid, attr_id);
+ }
+
inline TypedValue getTypedValueAtAbsolutePosition(const attribute_id attr_id,
const tuple_id tid) const {
return helper_.getAttributeValueTyped(tid, attr_id);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/types/containers/ColumnVector.hpp
----------------------------------------------------------------------
diff --git a/types/containers/ColumnVector.hpp b/types/containers/ColumnVector.hpp
index 76968ba..a9349ee 100644
--- a/types/containers/ColumnVector.hpp
+++ b/types/containers/ColumnVector.hpp
@@ -193,6 +193,22 @@ class NativeColumnVector : public ColumnVector {
}
/**
+ * @brief Get the untyped pointer to a value as well as the value's byte length
+ * in this NativeColumnVector as a pair.
+ *
+ * @param position The position of the value to get.
+ * @return A pair containing the untyped pointer to the value at position and
+ * the value's byte length.
+ **/
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLength(const std::size_t position) const {
+ DCHECK_LT(position, actual_length_);
+ return (check_null && null_bitmap_ && null_bitmap_->getBit(position))
+ ? std::make_pair(nullptr, 0)
+ : std::make_pair(static_cast<const char*>(values_) + (position * type_length_), type_length_);
+ }
+
+ /**
* @brief Get a value in this NativeColumnVector as a TypedValue.
*
* @param position The position of the value to get.
@@ -453,6 +469,25 @@ class IndirectColumnVector : public ColumnVector {
}
/**
+ * @brief Get the untyped pointer to a value as well as the value's byte length
+ * in this IndirectColumnVector as a pair.
+ *
+ * @param position The position of the value to get.
+ * @return A pair containing the untyped pointer to the value at position and
+ * the value's byte length.
+ **/
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLength(const std::size_t position) const {
+ DCHECK_LT(position, values_.size());
+ if (check_null && type_is_nullable_ && values_[position].isNull()) {
+ return std::make_pair(nullptr, 0);
+ } else {
+ const TypedValue &value = values_[position];
+ return std::make_pair(value.getDataPtr(), value.getDataSize());
+ }
+ }
+
+ /**
* @brief Get a value in this IndirectColumnVector as a TypedValue.
*
* @param position The position of the value to get.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/types/containers/ColumnVectorsValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/types/containers/ColumnVectorsValueAccessor.hpp b/types/containers/ColumnVectorsValueAccessor.hpp
index f1d29a2..d69d1d8 100644
--- a/types/containers/ColumnVectorsValueAccessor.hpp
+++ b/types/containers/ColumnVectorsValueAccessor.hpp
@@ -124,6 +124,11 @@ class ColumnVectorsValueAccessor : public ValueAccessor {
return getUntypedValueAtAbsolutePosition<check_null>(attr_id, current_position_);
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLength(const attribute_id attr_id) const {
+ return getUntypedValueAndByteLengthAtAbsolutePosition<check_null>(attr_id, current_position_);
+ }
+
inline TypedValue getTypedValue(const attribute_id attr_id) const {
return getTypedValueAtAbsolutePosition(attr_id, current_position_);
}
@@ -140,6 +145,18 @@ class ColumnVectorsValueAccessor : public ValueAccessor {
}
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLengthAtAbsolutePosition(const attribute_id attr_id,
+ const tuple_id tid) const {
+ DCHECK(attributeIdInRange(attr_id));
+ DCHECK(tupleIdInRange(tid));
+ if (column_native_[attr_id]) {
+ return static_cast<const NativeColumnVector&>(*columns_[attr_id]).getUntypedValueAndByteLength<check_null>(tid);
+ } else {
+ return static_cast<const IndirectColumnVector&>(*columns_[attr_id]).getUntypedValueAndByteLength<check_null>(tid);
+ }
+ }
+
inline TypedValue getTypedValueAtAbsolutePosition(const attribute_id attr_id,
const tuple_id tid) const {
DCHECK(attributeIdInRange(attr_id));
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/utility/BloomFilterAdapter.hpp
----------------------------------------------------------------------
diff --git a/utility/BloomFilterAdapter.hpp b/utility/BloomFilterAdapter.hpp
new file mode 100644
index 0000000..5deb275
--- /dev/null
+++ b/utility/BloomFilterAdapter.hpp
@@ -0,0 +1,128 @@
+/**
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin\u2014Madison.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **/
+
+#ifndef QUICKSTEP_UTILITY_BLOOM_FILTER_ADAPTER_HPP
+#define QUICKSTEP_UTILITY_BLOOM_FILTER_ADAPTER_HPP
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "catalog/CatalogTypedefs.hpp"
+#include "utility/BloomFilter.hpp"
+#include "utility/Macros.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+
+/** \addtogroup Utility
+ * @{
+ */
+
+class BloomFilterAdapter {
+ public:
+ BloomFilterAdapter(const std::vector<const BloomFilter*> &bloom_filters,
+ const std::vector<std::vector<attribute_id>> &attribute_ids)
+ : num_bloom_filters_(bloom_filters.size()) {
+ DCHECK_EQ(bloom_filters.size(), attribute_ids.size());
+
+ bloom_filter_entries_.reserve(num_bloom_filters_);
+ bloom_filter_entry_indices_.reserve(num_bloom_filters_);
+
+ for (std::size_t i = 0; i < num_bloom_filters_; ++i) {
+ bloom_filter_entries_.emplace_back(bloom_filters[i], attribute_ids[i]);
+ bloom_filter_entry_indices_.emplace_back(i);
+ }
+ }
+
+ template <typename ValueAccessorT>
+ inline bool miss(const ValueAccessorT *accessor) {
+ return missImpl<ValueAccessorT, true>(accessor);
+ }
+
+ template <typename ValueAccessorT, bool adapt_filters>
+ inline bool missImpl(const ValueAccessorT *accessor) {
+ for (std::size_t i = 0; i < num_bloom_filters_; ++i) {
+ const std::size_t entry_idx = bloom_filter_entry_indices_[i];
+ BloomFilterEntry &entry = bloom_filter_entries_[entry_idx];
+ if (adapt_filters) {
+ ++entry.cnt;
+ }
+
+ const BloomFilter *bloom_filter = entry.bloom_filter;
+ for (const attribute_id &attr_id : entry.attribute_ids) {
+ const std::pair<const void*, std::size_t> value_and_byte_length =
+ accessor->getUntypedValueAndByteLength(attr_id);
+ if (!bloom_filter->contains(static_cast<const std::uint8_t*>(value_and_byte_length.first),
+ value_and_byte_length.second)) {
+ if (adapt_filters) {
+ // Record miss
+ ++entry.miss;
+
+ // Update entry order
+ if (i > 0) {
+ const std::size_t prev_entry_idx = bloom_filter_entry_indices_[i-1];
+ if (entry.isBetterThan(bloom_filter_entries_[prev_entry_idx])) {
+ bloom_filter_entry_indices_[i-1] = entry_idx;
+ bloom_filter_entry_indices_[i] = prev_entry_idx;
+ }
+ }
+ }
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ private:
+ struct BloomFilterEntry {
+ BloomFilterEntry(const BloomFilter *in_bloom_filter,
+ const std::vector<attribute_id> &in_attribute_ids)
+ : bloom_filter(in_bloom_filter),
+ attribute_ids(in_attribute_ids),
+ miss(0),
+ cnt(0) {
+ }
+
+ inline bool isBetterThan(const BloomFilterEntry& other) {
+ return static_cast<std::uint64_t>(miss) * other.cnt
+ > static_cast<std::uint64_t>(cnt + 5) * (other.miss + 5);
+ }
+
+ const BloomFilter *bloom_filter;
+ const std::vector<attribute_id> &attribute_ids;
+ std::uint32_t miss;
+ std::uint32_t cnt;
+ };
+
+ const std::size_t num_bloom_filters_;
+ std::vector<BloomFilterEntry> bloom_filter_entries_;
+ std::vector<std::size_t> bloom_filter_entry_indices_;
+
+ DISALLOW_COPY_AND_ASSIGN(BloomFilterAdapter);
+};
+
+/** @} */
+
+} // namespace quickstep
+
+#endif // QUICKSTEP_UTILITY_BLOOM_FILTER_ADAPTER_HPP
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/10b25333/utility/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/utility/CMakeLists.txt b/utility/CMakeLists.txt
index 2d3db8f..133e2f3 100644
--- a/utility/CMakeLists.txt
+++ b/utility/CMakeLists.txt
@@ -159,6 +159,7 @@ add_library(quickstep_utility_Alignment ../empty_src.cpp Alignment.hpp)
add_library(quickstep_utility_BitManipulation ../empty_src.cpp BitManipulation.hpp)
add_library(quickstep_utility_BitVector ../empty_src.cpp BitVector.hpp)
add_library(quickstep_utility_BloomFilter ../empty_src.cpp BloomFilter.hpp)
+add_library(quickstep_utility_BloomFilterAdapter ../empty_src.cpp BloomFilterAdapter.hpp)
add_library(quickstep_utility_BloomFilter_proto
${quickstep_utility_BloomFilter_proto_srcs}
${quickstep_utility_BloomFilter_proto_hdrs})
@@ -166,6 +167,8 @@ add_library(quickstep_utility_CalculateInstalledMemory CalculateInstalledMemory.
add_library(quickstep_utility_Cast ../empty_src.cpp Cast.hpp)
add_library(quickstep_utility_CheckSnprintf ../empty_src.cpp CheckSnprintf.hpp)
add_library(quickstep_utility_DAG ../empty_src.cpp DAG.hpp)
+add_library(quickstep_utility_DAGVisualizer DAGVisualizer.cpp DAGVisualizer.hpp)
+add_library(quickstep_utility_EventProfiler EventProfiler.cpp EventProfiler.hpp)
add_library(quickstep_utility_EqualsAnyConstant ../empty_src.cpp EqualsAnyConstant.hpp)
add_library(quickstep_utility_Glob Glob.cpp Glob.hpp)
add_library(quickstep_utility_HashPair ../empty_src.cpp HashPair.hpp)
@@ -216,6 +219,10 @@ target_link_libraries(quickstep_utility_BloomFilter
quickstep_threading_SpinSharedMutex
quickstep_utility_BloomFilter_proto
quickstep_utility_Macros)
+target_link_libraries(quickstep_utility_BloomFilterAdapter
+ quickstep_catalog_CatalogTypedefs
+ quickstep_utility_BloomFilter
+ quickstep_utility_Macros)
target_link_libraries(quickstep_utility_BloomFilter_proto
${PROTOBUF_LIBRARY})
target_link_libraries(quickstep_utility_CalculateInstalledMemory
@@ -225,6 +232,8 @@ target_link_libraries(quickstep_utility_CheckSnprintf
target_link_libraries(quickstep_utility_DAG
glog
quickstep_utility_Macros)
+target_link_libraries(quickstep_utility_EventProfiler
+ quickstep_threading_Mutex)
target_link_libraries(quickstep_utility_Glob
glog)
target_link_libraries(quickstep_utility_MemStream
@@ -243,6 +252,11 @@ target_link_libraries(quickstep_utility_PlanVisualizer
quickstep_queryoptimizer_physical_TopLevelPlan
quickstep_utility_Macros
quickstep_utility_StringUtil)
+target_link_libraries(quickstep_utility_DAGVisualizer
+ quickstep_queryoptimizer_QueryPlan
+ quickstep_utility_EventProfiler
+ quickstep_utility_Macros
+ quickstep_utility_StringUtil)
target_link_libraries(quickstep_utility_PtrList
quickstep_utility_Macros)
target_link_libraries(quickstep_utility_PtrMap
@@ -297,11 +311,14 @@ target_link_libraries(quickstep_utility
quickstep_utility_BitManipulation
quickstep_utility_BitVector
quickstep_utility_BloomFilter
+ quickstep_utility_BloomFilterAdapter
quickstep_utility_BloomFilter_proto
quickstep_utility_CalculateInstalledMemory
quickstep_utility_Cast
quickstep_utility_CheckSnprintf
quickstep_utility_DAG
+ quickstep_utility_DAGVisualizer
+ quickstep_utility_EventProfiler
quickstep_utility_EqualsAnyConstant
quickstep_utility_Glob
quickstep_utility_HashPair