You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by zu...@apache.org on 2017/02/10 05:33:12 UTC
[1/5] incubator-quickstep git commit: Fixed the lint issue in
QueryManagerDistributed. [Forced Update!]
Repository: incubator-quickstep
Updated Branches:
refs/heads/two-level-tmb ecd2adbd1 -> 469c777ab (forced update)
Fixed the lint issue in QueryManagerDistributed.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/960eb357
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/960eb357
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/960eb357
Branch: refs/heads/two-level-tmb
Commit: 960eb3570b4a5d3ddac300c8c083686b35eb46a0
Parents: 167ee87
Author: Zuyu Zhang <zu...@apache.org>
Authored: Thu Feb 9 21:14:46 2017 -0800
Committer: Zuyu Zhang <zu...@apache.org>
Committed: Thu Feb 9 21:14:46 2017 -0800
----------------------------------------------------------------------
query_execution/QueryManagerDistributed.cpp | 2 +-
query_execution/QueryManagerDistributed.hpp | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/960eb357/query_execution/QueryManagerDistributed.cpp
----------------------------------------------------------------------
diff --git a/query_execution/QueryManagerDistributed.cpp b/query_execution/QueryManagerDistributed.cpp
index 174c490..6c6f895 100644
--- a/query_execution/QueryManagerDistributed.cpp
+++ b/query_execution/QueryManagerDistributed.cpp
@@ -53,7 +53,7 @@ namespace quickstep {
QueryManagerDistributed::QueryManagerDistributed(QueryHandle *query_handle,
const tmb::client_id foreman_client_id,
const std::size_t num_shiftbosses,
- tmb::Address &&shiftboss_addresses,
+ tmb::Address &&shiftboss_addresses, // NOLINT(whitespace/operators)
tmb::MessageBus *bus)
: QueryManagerBase(query_handle),
foreman_client_id_(foreman_client_id),
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/960eb357/query_execution/QueryManagerDistributed.hpp
----------------------------------------------------------------------
diff --git a/query_execution/QueryManagerDistributed.hpp b/query_execution/QueryManagerDistributed.hpp
index 14401a6..3ebc434 100644
--- a/query_execution/QueryManagerDistributed.hpp
+++ b/query_execution/QueryManagerDistributed.hpp
@@ -65,7 +65,7 @@ class QueryManagerDistributed final : public QueryManagerBase {
QueryManagerDistributed(QueryHandle *query_handle,
const tmb::client_id foreman_client_id,
const std::size_t num_shiftbosses,
- tmb::Address &&shiftboss_addresses,
+ tmb::Address &&shiftboss_addresses, // NOLINT(whitespace/operators)
tmb::MessageBus *bus);
~QueryManagerDistributed() override {}
[3/5] incubator-quickstep git commit: Removed an incorrect TODO.
Posted by zu...@apache.org.
Removed an incorrect TODO.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/a8e56188
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/a8e56188
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/a8e56188
Branch: refs/heads/two-level-tmb
Commit: a8e561881389eae8899360af141f5b50c66b3692
Parents: 960eb35
Author: Zuyu Zhang <zu...@apache.org>
Authored: Thu Feb 9 21:31:31 2017 -0800
Committer: Zuyu Zhang <zu...@apache.org>
Committed: Thu Feb 9 21:31:47 2017 -0800
----------------------------------------------------------------------
cli/QuickstepCli.cpp | 1 -
1 file changed, 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a8e56188/cli/QuickstepCli.cpp
----------------------------------------------------------------------
diff --git a/cli/QuickstepCli.cpp b/cli/QuickstepCli.cpp
index 02ec4ec..eddee8c 100644
--- a/cli/QuickstepCli.cpp
+++ b/cli/QuickstepCli.cpp
@@ -266,7 +266,6 @@ int main(int argc, char* argv[]) {
worker_client_ids.push_back(workers.back().getBusClientID());
}
- // TODO(zuyu): Move WorkerDirectory within Shiftboss once the latter is added.
WorkerDirectory worker_directory(worker_cpu_affinities.size(),
worker_client_ids,
worker_numa_nodes);
[5/5] incubator-quickstep git commit: Used two TMB implementations in
Shiftboss.
Posted by zu...@apache.org.
Used two TMB implementations in Shiftboss.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/469c777a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/469c777a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/469c777a
Branch: refs/heads/two-level-tmb
Commit: 469c777abbe3f90b524008d2247aa404e6ac5736
Parents: 72417f7
Author: Zuyu Zhang <zu...@apache.org>
Authored: Wed Feb 8 12:48:31 2017 -0800
Committer: Zuyu Zhang <zu...@apache.org>
Committed: Thu Feb 9 21:32:47 2017 -0800
----------------------------------------------------------------------
cli/distributed/Executor.cpp | 7 +-
cli/distributed/Executor.hpp | 4 +
query_execution/Shiftboss.cpp | 419 +++++++++++--------
query_execution/Shiftboss.hpp | 91 +---
.../DistributedExecutionGeneratorTestRunner.cpp | 9 +-
.../DistributedExecutionGeneratorTestRunner.hpp | 1 +
6 files changed, 279 insertions(+), 252 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/469c777a/cli/distributed/Executor.cpp
----------------------------------------------------------------------
diff --git a/cli/distributed/Executor.cpp b/cli/distributed/Executor.cpp
index 3485298..e248fef 100644
--- a/cli/distributed/Executor.cpp
+++ b/cli/distributed/Executor.cpp
@@ -35,6 +35,7 @@
#include "tmb/id_typedefs.h"
#include "tmb/native_net_client_message_bus.h"
+#include "tmb/pure_memory_message_bus.h"
#include "glog/logging.h"
@@ -47,6 +48,8 @@ using tmb::client_id;
namespace quickstep {
void Executor::init() {
+ bus_local_.Initialize();
+
executor_client_id_ = bus_.Connect();
DLOG(INFO) << "Executor TMB Client ID: " << executor_client_id_;
@@ -59,7 +62,7 @@ void Executor::init() {
for (std::size_t worker_thread_index = 0;
worker_thread_index < FLAGS_num_workers;
++worker_thread_index) {
- workers_.push_back(make_unique<Worker>(worker_thread_index, &bus_));
+ workers_.push_back(make_unique<Worker>(worker_thread_index, &bus_local_));
worker_client_ids.push_back(workers_.back()->getBusClientID());
}
@@ -76,7 +79,7 @@ void Executor::init() {
data_exchanger_.start();
shiftboss_ =
- make_unique<Shiftboss>(&bus_, storage_manager_.get(), worker_directory_.get(), storage_manager_->hdfs());
+ make_unique<Shiftboss>(&bus_, &bus_local_, storage_manager_.get(), worker_directory_.get(), storage_manager_->hdfs());
shiftboss_->start();
for (const auto &worker : workers_) {
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/469c777a/cli/distributed/Executor.hpp
----------------------------------------------------------------------
diff --git a/cli/distributed/Executor.hpp b/cli/distributed/Executor.hpp
index 6ffa756..aafeeae 100644
--- a/cli/distributed/Executor.hpp
+++ b/cli/distributed/Executor.hpp
@@ -24,6 +24,7 @@
#include <vector>
#include "cli/distributed/Role.hpp"
+#include "query_execution/QueryExecutionTypedefs.hpp"
#include "query_execution/Shiftboss.hpp"
#include "query_execution/Worker.hpp"
#include "query_execution/WorkerDirectory.hpp"
@@ -65,6 +66,9 @@ class Executor final : public Role {
void run() override {}
private:
+ // Used between Shiftboss and Workers.
+ MessageBusImpl bus_local_;
+
tmb::client_id executor_client_id_;
std::vector<std::unique_ptr<Worker>> workers_;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/469c777a/query_execution/Shiftboss.cpp
----------------------------------------------------------------------
diff --git a/query_execution/Shiftboss.cpp b/query_execution/Shiftboss.cpp
index bae5205..2f7dc3c 100644
--- a/query_execution/Shiftboss.cpp
+++ b/query_execution/Shiftboss.cpp
@@ -64,6 +64,91 @@ namespace quickstep {
class WorkOrder;
+Shiftboss::Shiftboss(tmb::MessageBus *bus_global,
+ tmb::MessageBus *bus_local,
+ StorageManager *storage_manager,
+ WorkerDirectory *workers,
+ void *hdfs,
+ const int cpu_id)
+ : bus_global_(DCHECK_NOTNULL(bus_global)),
+ bus_local_(DCHECK_NOTNULL(bus_local)),
+ storage_manager_(DCHECK_NOTNULL(storage_manager)),
+ workers_(DCHECK_NOTNULL(workers)),
+ hdfs_(hdfs),
+ cpu_id_(cpu_id),
+ shiftboss_client_id_global_(tmb::kClientIdNone),
+ shiftboss_client_id_local_(tmb::kClientIdNone),
+ foreman_client_id_(tmb::kClientIdNone),
+ max_msgs_per_worker_(1),
+ start_worker_index_(0u) {
+ // Check to have at least one Worker.
+ DCHECK_GT(workers->getNumWorkers(), 0u);
+
+#ifdef QUICKSTEP_HAVE_FILE_MANAGER_HDFS
+ if (FLAGS_use_hdfs) {
+ CHECK(hdfs_);
+ }
+#endif // QUICKSTEP_HAVE_FILE_MANAGER_HDFS
+
+ shiftboss_client_id_global_ = bus_global_->Connect();
+ LOG(INFO) << "Shiftboss TMB client ID: " << shiftboss_client_id_global_;
+ DCHECK_NE(shiftboss_client_id_global_, tmb::kClientIdNone);
+
+ shiftboss_client_id_local_ = bus_local_->Connect();
+ DCHECK_NE(shiftboss_client_id_local_, tmb::kClientIdNone);
+
+ // Messages between Foreman and Shiftboss.
+ bus_global_->RegisterClientAsSender(shiftboss_client_id_global_, kShiftbossRegistrationMessage);
+ bus_global_->RegisterClientAsReceiver(shiftboss_client_id_global_, kShiftbossRegistrationResponseMessage);
+
+ bus_global_->RegisterClientAsReceiver(shiftboss_client_id_global_, kQueryInitiateMessage);
+ bus_global_->RegisterClientAsSender(shiftboss_client_id_global_, kQueryInitiateResponseMessage);
+
+ bus_global_->RegisterClientAsReceiver(shiftboss_client_id_global_, kInitiateRebuildMessage);
+ bus_global_->RegisterClientAsSender(shiftboss_client_id_global_, kInitiateRebuildResponseMessage);
+
+ bus_global_->RegisterClientAsReceiver(shiftboss_client_id_global_, kSaveQueryResultMessage);
+ bus_global_->RegisterClientAsSender(shiftboss_client_id_global_, kSaveQueryResultResponseMessage);
+
+ // Message sent to Worker.
+ bus_local_->RegisterClientAsSender(shiftboss_client_id_local_, kShiftbossRegistrationResponseMessage);
+ bus_local_->RegisterClientAsSender(shiftboss_client_id_local_, kRebuildWorkOrderMessage);
+
+ // Forward the following message types from Foreman to Workers.
+ bus_global_->RegisterClientAsReceiver(shiftboss_client_id_global_, kWorkOrderMessage);
+ bus_local_->RegisterClientAsSender(shiftboss_client_id_local_, kWorkOrderMessage);
+
+ // Forward the following message types from Workers to Foreman.
+ bus_local_->RegisterClientAsReceiver(shiftboss_client_id_local_, kCatalogRelationNewBlockMessage);
+ bus_global_->RegisterClientAsSender(shiftboss_client_id_global_, kCatalogRelationNewBlockMessage);
+
+ bus_local_->RegisterClientAsReceiver(shiftboss_client_id_local_, kDataPipelineMessage);
+ bus_global_->RegisterClientAsSender(shiftboss_client_id_global_, kDataPipelineMessage);
+
+ bus_local_->RegisterClientAsReceiver(shiftboss_client_id_local_, kWorkOrderFeedbackMessage);
+ bus_global_->RegisterClientAsSender(shiftboss_client_id_global_, kWorkOrderFeedbackMessage);
+
+ bus_local_->RegisterClientAsReceiver(shiftboss_client_id_local_, kWorkOrderCompleteMessage);
+ bus_global_->RegisterClientAsSender(shiftboss_client_id_global_, kWorkOrderCompleteMessage);
+
+ bus_local_->RegisterClientAsReceiver(shiftboss_client_id_local_, kRebuildWorkOrderCompleteMessage);
+ bus_global_->RegisterClientAsSender(shiftboss_client_id_global_, kRebuildWorkOrderCompleteMessage);
+
+ // Clean up query execution states, i.e., QueryContext.
+ bus_global_->RegisterClientAsReceiver(shiftboss_client_id_global_, kQueryTeardownMessage);
+
+ // Stop itself.
+ bus_global_->RegisterClientAsReceiver(shiftboss_client_id_global_, kPoisonMessage);
+ // Stop all workers.
+ bus_local_->RegisterClientAsSender(shiftboss_client_id_local_, kPoisonMessage);
+
+ for (std::size_t i = 0; i < workers_->getNumWorkers(); ++i) {
+ worker_addresses_.AddRecipient(workers_->getClientID(i));
+ }
+
+ registerWithForeman();
+}
+
void Shiftboss::run() {
if (cpu_id_ >= 0) {
// We can pin the shiftboss thread to a CPU if specified.
@@ -73,159 +158,161 @@ void Shiftboss::run() {
processShiftbossRegistrationResponseMessage();
for (;;) {
- // Receive() is a blocking call, causing this thread to sleep until next
- // message is received.
- AnnotatedMessage annotated_message(bus_->Receive(shiftboss_client_id_, 0, true));
- DLOG(INFO) << "Shiftboss " << shiftboss_index_ << " (id '" << shiftboss_client_id_
- << "') received the typed '" << annotated_message.tagged_message.message_type()
- << "' message from client " << annotated_message.sender;
- switch (annotated_message.tagged_message.message_type()) {
- case kQueryInitiateMessage: {
- const TaggedMessage &tagged_message = annotated_message.tagged_message;
-
- serialization::QueryInitiateMessage proto;
- CHECK(proto.ParseFromArray(tagged_message.message(), tagged_message.message_bytes()));
-
- processQueryInitiateMessage(proto.query_id(), proto.catalog_database_cache(), proto.query_context());
- break;
- }
- case kWorkOrderMessage: {
- const TaggedMessage &tagged_message = annotated_message.tagged_message;
-
- serialization::WorkOrderMessage proto;
- CHECK(proto.ParseFromArray(tagged_message.message(), tagged_message.message_bytes()));
-
- const std::size_t query_id = proto.query_id();
- DCHECK_EQ(1u, query_contexts_.count(query_id));
-
- WorkOrder *work_order = WorkOrderFactory::ReconstructFromProto(proto.work_order(),
- shiftboss_index_,
- &database_cache_,
- query_contexts_[query_id].get(),
- storage_manager_,
- shiftboss_client_id_,
- bus_,
- hdfs_);
-
- unique_ptr<WorkerMessage> worker_message(
- WorkerMessage::WorkOrderMessage(work_order, proto.operator_index()));
-
- TaggedMessage worker_tagged_message(worker_message.get(),
- sizeof(*worker_message),
- kWorkOrderMessage);
-
- const size_t worker_index = getSchedulableWorker();
- DLOG(INFO) << "Shiftboss " << shiftboss_index_ << " (id '" << shiftboss_client_id_
- << "') forwarded WorkOrderMessage (typed '" << kWorkOrderMessage
- << "') from Foreman to worker " << worker_index;
-
- const MessageBus::SendStatus send_status =
- QueryExecutionUtil::SendTMBMessage(bus_,
- shiftboss_client_id_,
- workers_->getClientID(worker_index),
- move(worker_tagged_message));
- CHECK(send_status == MessageBus::SendStatus::kOK);
- break;
- }
- case kInitiateRebuildMessage: {
- // Construct rebuild work orders, and send back their number to
- // 'ForemanDistributed'.
- const TaggedMessage &tagged_message = annotated_message.tagged_message;
-
- serialization::InitiateRebuildMessage proto;
- CHECK(proto.ParseFromArray(tagged_message.message(), tagged_message.message_bytes()));
-
- processInitiateRebuildMessage(proto.query_id(),
- proto.operator_index(),
- proto.insert_destination_index(),
- proto.relation_id());
- break;
- }
- case kCatalogRelationNewBlockMessage: // Fall through.
- case kDataPipelineMessage:
- case kWorkOrderFeedbackMessage:
- case kWorkOrderCompleteMessage:
- case kRebuildWorkOrderCompleteMessage: {
- DLOG(INFO) << "Shiftboss " << shiftboss_index_ << " (id '" << shiftboss_client_id_
- << "') forwarded typed '" << annotated_message.tagged_message.message_type()
- << "' message from Worker with TMB client ID '" << annotated_message.sender
- << "' to Foreman with TMB client ID " << foreman_client_id_;
-
- DCHECK_NE(foreman_client_id_, tmb::kClientIdNone);
- const MessageBus::SendStatus send_status =
- QueryExecutionUtil::SendTMBMessage(bus_,
- shiftboss_client_id_,
- foreman_client_id_,
- move(annotated_message.tagged_message));
- CHECK(send_status == MessageBus::SendStatus::kOK);
- break;
- }
- case kQueryTeardownMessage: {
- const TaggedMessage &tagged_message = annotated_message.tagged_message;
+ AnnotatedMessage annotated_message;
+ if (bus_global_->ReceiveIfAvailable(shiftboss_client_id_global_, &annotated_message, 0, true)) {
+ DLOG(INFO) << "Shiftboss " << shiftboss_index_ << " (id '" << shiftboss_client_id_global_
+ << "') received the typed '" << annotated_message.tagged_message.message_type()
+ << "' message from Foreman " << annotated_message.sender;
+ switch (annotated_message.tagged_message.message_type()) {
+ case kQueryInitiateMessage: {
+ const TaggedMessage &tagged_message = annotated_message.tagged_message;
+
+ serialization::QueryInitiateMessage proto;
+ CHECK(proto.ParseFromArray(tagged_message.message(), tagged_message.message_bytes()));
+
+ processQueryInitiateMessage(proto.query_id(), proto.catalog_database_cache(), proto.query_context());
+ break;
+ }
+ case kWorkOrderMessage: {
+ const TaggedMessage &tagged_message = annotated_message.tagged_message;
+
+ serialization::WorkOrderMessage proto;
+ CHECK(proto.ParseFromArray(tagged_message.message(), tagged_message.message_bytes()));
+
+ const std::size_t query_id = proto.query_id();
+ DCHECK_EQ(1u, query_contexts_.count(query_id));
+
+ unique_ptr<WorkOrder> work_order(
+ WorkOrderFactory::ReconstructFromProto(proto.work_order(), shiftboss_index_, &database_cache_,
+ query_contexts_[query_id].get(), storage_manager_,
+ shiftboss_client_id_local_, bus_local_, hdfs_));
+
+ unique_ptr<WorkerMessage> worker_message(
+ WorkerMessage::WorkOrderMessage(work_order.release(), proto.operator_index()));
+
+ TaggedMessage worker_tagged_message(worker_message.get(),
+ sizeof(*worker_message),
+ kWorkOrderMessage);
+
+ const size_t worker_index = getSchedulableWorker();
+ DLOG(INFO) << "Shiftboss " << shiftboss_index_ << " (id '" << shiftboss_client_id_local_
+ << "') forwarded WorkOrderMessage (typed '" << kWorkOrderMessage
+ << "') from Foreman to worker " << worker_index;
+
+ const MessageBus::SendStatus send_status =
+ QueryExecutionUtil::SendTMBMessage(bus_local_,
+ shiftboss_client_id_local_,
+ workers_->getClientID(worker_index),
+ move(worker_tagged_message));
+ CHECK(send_status == MessageBus::SendStatus::kOK);
+ break;
+ }
+ case kInitiateRebuildMessage: {
+ // Construct rebuild work orders, and send back their number to
+ // 'ForemanDistributed'.
+ const TaggedMessage &tagged_message = annotated_message.tagged_message;
+
+ serialization::InitiateRebuildMessage proto;
+ CHECK(proto.ParseFromArray(tagged_message.message(), tagged_message.message_bytes()));
+
+ processInitiateRebuildMessage(proto.query_id(),
+ proto.operator_index(),
+ proto.insert_destination_index(),
+ proto.relation_id());
+ break;
+ }
+ case kQueryTeardownMessage: {
+ const TaggedMessage &tagged_message = annotated_message.tagged_message;
- serialization::QueryTeardownMessage proto;
- CHECK(proto.ParseFromArray(tagged_message.message(), tagged_message.message_bytes()));
+ serialization::QueryTeardownMessage proto;
+ CHECK(proto.ParseFromArray(tagged_message.message(), tagged_message.message_bytes()));
- query_contexts_.erase(proto.query_id());
- break;
+ query_contexts_.erase(proto.query_id());
+ break;
+ }
+ case kSaveQueryResultMessage: {
+ const TaggedMessage &tagged_message = annotated_message.tagged_message;
+
+ serialization::SaveQueryResultMessage proto;
+ CHECK(proto.ParseFromArray(tagged_message.message(), tagged_message.message_bytes()));
+
+ for (int i = 0; i < proto.blocks_size(); ++i) {
+ storage_manager_->saveBlockOrBlob(proto.blocks(i));
+ }
+
+ // Clean up query execution states, i.e., QueryContext.
+ query_contexts_.erase(proto.query_id());
+
+ serialization::SaveQueryResultResponseMessage proto_response;
+ proto_response.set_query_id(proto.query_id());
+ proto_response.set_relation_id(proto.relation_id());
+ proto_response.set_cli_id(proto.cli_id());
+ proto_response.set_shiftboss_index(shiftboss_index_);
+
+ const size_t proto_response_length = proto_response.ByteSize();
+ char *proto_response_bytes = static_cast<char*>(malloc(proto_response_length));
+ CHECK(proto_response.SerializeToArray(proto_response_bytes, proto_response_length));
+
+ TaggedMessage message_response(static_cast<const void*>(proto_response_bytes),
+ proto_response_length,
+ kSaveQueryResultResponseMessage);
+ free(proto_response_bytes);
+
+ DLOG(INFO) << "Shiftboss " << shiftboss_index_ << " (id '" << shiftboss_client_id_global_
+ << "') sent SaveQueryResultResponseMessage (typed '" << kSaveQueryResultResponseMessage
+ << "') to Foreman with TMB client ID " << foreman_client_id_;
+ const MessageBus::SendStatus send_status =
+ QueryExecutionUtil::SendTMBMessage(bus_global_,
+ shiftboss_client_id_global_,
+ foreman_client_id_,
+ move(message_response));
+ CHECK(send_status == MessageBus::SendStatus::kOK);
+ break;
+ }
+ case kPoisonMessage: {
+ DLOG(INFO) << "Shiftboss " << shiftboss_index_ << " (id '" << shiftboss_client_id_global_
+ << "') forwarded PoisonMessage (typed '" << kPoisonMessage
+ << "') from Foreman to all workers";
+
+ tmb::MessageStyle broadcast_style;
+ broadcast_style.Broadcast(true);
+
+ const MessageBus::SendStatus send_status =
+ bus_local_->Send(shiftboss_client_id_local_, worker_addresses_, broadcast_style,
+ move(annotated_message.tagged_message));
+ CHECK(send_status == MessageBus::SendStatus::kOK);
+ return;
+ }
+ default: {
+ LOG(FATAL) << "Unknown TMB message type";
+ }
}
- case kSaveQueryResultMessage: {
- const TaggedMessage &tagged_message = annotated_message.tagged_message;
-
- serialization::SaveQueryResultMessage proto;
- CHECK(proto.ParseFromArray(tagged_message.message(), tagged_message.message_bytes()));
+ }
- for (int i = 0; i < proto.blocks_size(); ++i) {
- storage_manager_->saveBlockOrBlob(proto.blocks(i));
+ while (bus_local_->ReceiveIfAvailable(shiftboss_client_id_local_, &annotated_message, 0, true)) {
+ switch (annotated_message.tagged_message.message_type()) {
+ case kCatalogRelationNewBlockMessage:
+ case kDataPipelineMessage:
+ case kWorkOrderFeedbackMessage:
+ case kWorkOrderCompleteMessage:
+ case kRebuildWorkOrderCompleteMessage: {
+ DLOG(INFO) << "Shiftboss " << shiftboss_index_ << " (id '" << shiftboss_client_id_global_
+ << "') forwarded typed '" << annotated_message.tagged_message.message_type()
+ << "' message from Worker with TMB client ID '" << annotated_message.sender
+ << "' to Foreman with TMB client ID " << foreman_client_id_;
+
+ DCHECK_NE(foreman_client_id_, tmb::kClientIdNone);
+ const MessageBus::SendStatus send_status =
+ QueryExecutionUtil::SendTMBMessage(bus_global_,
+ shiftboss_client_id_global_,
+ foreman_client_id_,
+ move(annotated_message.tagged_message));
+ CHECK(send_status == MessageBus::SendStatus::kOK);
+ break;
+ }
+ default: {
+ LOG(FATAL) << "Unknown TMB message type";
}
-
- // Clean up query execution states, i.e., QueryContext.
- query_contexts_.erase(proto.query_id());
-
- serialization::SaveQueryResultResponseMessage proto_response;
- proto_response.set_query_id(proto.query_id());
- proto_response.set_relation_id(proto.relation_id());
- proto_response.set_cli_id(proto.cli_id());
- proto_response.set_shiftboss_index(shiftboss_index_);
-
- const size_t proto_response_length = proto_response.ByteSize();
- char *proto_response_bytes = static_cast<char*>(malloc(proto_response_length));
- CHECK(proto_response.SerializeToArray(proto_response_bytes, proto_response_length));
-
- TaggedMessage message_response(static_cast<const void*>(proto_response_bytes),
- proto_response_length,
- kSaveQueryResultResponseMessage);
- free(proto_response_bytes);
-
- DLOG(INFO) << "Shiftboss " << shiftboss_index_ << " (id '" << shiftboss_client_id_
- << "') sent SaveQueryResultResponseMessage (typed '" << kSaveQueryResultResponseMessage
- << "') to Foreman with TMB client ID " << foreman_client_id_;
- const MessageBus::SendStatus send_status =
- QueryExecutionUtil::SendTMBMessage(bus_,
- shiftboss_client_id_,
- foreman_client_id_,
- move(message_response));
- CHECK(send_status == MessageBus::SendStatus::kOK);
- break;
- }
- case kPoisonMessage: {
- DLOG(INFO) << "Shiftboss " << shiftboss_index_ << " (id '" << shiftboss_client_id_
- << "') forwarded PoisonMessage (typed '" << kPoisonMessage
- << "') from Foreman to all workers";
-
- tmb::MessageStyle broadcast_style;
- broadcast_style.Broadcast(true);
-
- const MessageBus::SendStatus send_status =
- bus_->Send(shiftboss_client_id_,
- worker_addresses_,
- broadcast_style,
- move(annotated_message.tagged_message));
- CHECK(send_status == MessageBus::SendStatus::kOK);
- return;
- }
- default: {
- LOG(FATAL) << "Unknown TMB message type";
}
}
}
@@ -265,21 +352,21 @@ void Shiftboss::registerWithForeman() {
kShiftbossRegistrationMessage);
free(proto_bytes);
- DLOG(INFO) << "Shiftboss " << shiftboss_index_ << " (id '" << shiftboss_client_id_
+ DLOG(INFO) << "Shiftboss " << shiftboss_index_ << " (id '" << shiftboss_client_id_global_
<< "') sent ShiftbossRegistrationMessage (typed '" << kShiftbossRegistrationMessage
<< "') to all";
tmb::MessageBus::SendStatus send_status =
- bus_->Send(shiftboss_client_id_, all_addresses, style, move(message));
+ bus_global_->Send(shiftboss_client_id_global_, all_addresses, style, move(message));
DCHECK(send_status == tmb::MessageBus::SendStatus::kOK);
}
void Shiftboss::processShiftbossRegistrationResponseMessage() {
- AnnotatedMessage annotated_message(bus_->Receive(shiftboss_client_id_, 0, true));
+ AnnotatedMessage annotated_message(bus_global_->Receive(shiftboss_client_id_global_, 0, true));
const TaggedMessage &tagged_message = annotated_message.tagged_message;
DCHECK_EQ(kShiftbossRegistrationResponseMessage, tagged_message.message_type());
foreman_client_id_ = annotated_message.sender;
- DLOG(INFO) << "Shiftboss (id '" << shiftboss_client_id_
+ DLOG(INFO) << "Shiftboss (id '" << shiftboss_client_id_local_
<< "') received the typed '" << kShiftbossRegistrationResponseMessage
<< "' message from ForemanDistributed with client " << foreman_client_id_;
@@ -290,10 +377,10 @@ void Shiftboss::processShiftbossRegistrationResponseMessage() {
storage_manager_->sendBlockDomainToShiftbossIndexMessage(shiftboss_index_);
// Forward this message to Workers regarding <shiftboss_index_>.
- QueryExecutionUtil::BroadcastMessage(shiftboss_client_id_,
+ QueryExecutionUtil::BroadcastMessage(shiftboss_client_id_local_,
worker_addresses_,
move(annotated_message.tagged_message),
- bus_);
+ bus_local_);
}
void Shiftboss::processQueryInitiateMessage(
@@ -303,7 +390,7 @@ void Shiftboss::processQueryInitiateMessage(
database_cache_.update(catalog_database_cache_proto);
auto query_context = std::make_unique<QueryContext>(
- query_context_proto, database_cache_, storage_manager_, shiftboss_client_id_, bus_);
+ query_context_proto, database_cache_, storage_manager_, shiftboss_client_id_local_, bus_local_);
query_contexts_.emplace(query_id, move(query_context));
serialization::QueryInitiateResponseMessage proto;
@@ -318,12 +405,12 @@ void Shiftboss::processQueryInitiateMessage(
kQueryInitiateResponseMessage);
free(proto_bytes);
- DLOG(INFO) << "Shiftboss " << shiftboss_index_ << " (id '" << shiftboss_client_id_
+ DLOG(INFO) << "Shiftboss " << shiftboss_index_ << " (id '" << shiftboss_client_id_global_
<< "') sent QueryInitiateResponseMessage (typed '" << kQueryInitiateResponseMessage
<< "') to Foreman with TMB client ID " << foreman_client_id_;
const MessageBus::SendStatus send_status =
- QueryExecutionUtil::SendTMBMessage(bus_,
- shiftboss_client_id_,
+ QueryExecutionUtil::SendTMBMessage(bus_global_,
+ shiftboss_client_id_global_,
foreman_client_id_,
move(message_response));
CHECK(send_status == MessageBus::SendStatus::kOK);
@@ -357,12 +444,12 @@ void Shiftboss::processInitiateRebuildMessage(const std::size_t query_id,
kInitiateRebuildResponseMessage);
free(proto_bytes);
- DLOG(INFO) << "Shiftboss " << shiftboss_index_ << " (id '" << shiftboss_client_id_
+ DLOG(INFO) << "Shiftboss " << shiftboss_index_ << " (id '" << shiftboss_client_id_global_
<< "') sent InitiateRebuildResponseMessage (typed '" << kInitiateRebuildResponseMessage
<< "') to Foreman with TMB client ID " << foreman_client_id_;
const MessageBus::SendStatus send_status =
- QueryExecutionUtil::SendTMBMessage(bus_,
- shiftboss_client_id_,
+ QueryExecutionUtil::SendTMBMessage(bus_global_,
+ shiftboss_client_id_global_,
foreman_client_id_,
move(message_response));
CHECK(send_status == MessageBus::SendStatus::kOK);
@@ -375,8 +462,8 @@ void Shiftboss::processInitiateRebuildMessage(const std::size_t query_id,
move(partially_filled_block_refs[i]),
op_index,
rel_id,
- shiftboss_client_id_,
- bus_);
+ shiftboss_client_id_local_,
+ bus_local_);
unique_ptr<WorkerMessage> worker_message(
WorkerMessage::RebuildWorkOrderMessage(rebuild_work_order, op_index));
@@ -386,13 +473,13 @@ void Shiftboss::processInitiateRebuildMessage(const std::size_t query_id,
kRebuildWorkOrderMessage);
const size_t worker_index = getSchedulableWorker();
- DLOG(INFO) << "Shiftboss " << shiftboss_index_ << " (id '" << shiftboss_client_id_
+ DLOG(INFO) << "Shiftboss " << shiftboss_index_ << " (id '" << shiftboss_client_id_local_
<< "') sent RebuildWorkOrderMessage (typed '" << kRebuildWorkOrderMessage
<< "') to worker " << worker_index;
const MessageBus::SendStatus send_status =
- QueryExecutionUtil::SendTMBMessage(bus_,
- shiftboss_client_id_,
+ QueryExecutionUtil::SendTMBMessage(bus_local_,
+ shiftboss_client_id_local_,
workers_->getClientID(worker_index),
move(worker_tagged_message));
CHECK(send_status == MessageBus::SendStatus::kOK);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/469c777a/query_execution/Shiftboss.hpp
----------------------------------------------------------------------
diff --git a/query_execution/Shiftboss.hpp b/query_execution/Shiftboss.hpp
index 0750cca..d10a298 100644
--- a/query_execution/Shiftboss.hpp
+++ b/query_execution/Shiftboss.hpp
@@ -63,7 +63,8 @@ class Shiftboss : public Thread {
/**
* @brief Constructor.
*
- * @param bus A pointer to the TMB.
+ * @param bus_global A pointer to the TMB for Foreman.
+ * @param bus_local A pointer to the TMB for Workers.
* @param storage_manager The StorageManager to use.
* @param workers A pointer to the WorkerDirectory.
* @param hdfs The HDFS connector via libhdfs3.
@@ -72,84 +73,12 @@ class Shiftboss : public Thread {
* @note If cpu_id is not specified, Shiftboss thread can be possibly moved
* around on different CPUs by the OS.
**/
- Shiftboss(tmb::MessageBus *bus,
+ Shiftboss(tmb::MessageBus *bus_global,
+ tmb::MessageBus *bus_local,
StorageManager *storage_manager,
WorkerDirectory *workers,
- void *hdfs = nullptr,
- const int cpu_id = -1)
- : bus_(DCHECK_NOTNULL(bus)),
- storage_manager_(DCHECK_NOTNULL(storage_manager)),
- workers_(DCHECK_NOTNULL(workers)),
- hdfs_(hdfs),
- cpu_id_(cpu_id),
- shiftboss_client_id_(tmb::kClientIdNone),
- foreman_client_id_(tmb::kClientIdNone),
- max_msgs_per_worker_(1),
- start_worker_index_(0u) {
- // Check to have at least one Worker.
- DCHECK_GT(workers->getNumWorkers(), 0u);
-
-#ifdef QUICKSTEP_HAVE_FILE_MANAGER_HDFS
- if (FLAGS_use_hdfs) {
- CHECK(hdfs_);
- }
-#endif // QUICKSTEP_HAVE_FILE_MANAGER_HDFS
-
- shiftboss_client_id_ = bus_->Connect();
- LOG(INFO) << "Shiftboss TMB client ID: " << shiftboss_client_id_;
- DCHECK_NE(shiftboss_client_id_, tmb::kClientIdNone);
-
- // Messages between Foreman and Shiftboss.
- bus_->RegisterClientAsSender(shiftboss_client_id_, kShiftbossRegistrationMessage);
- bus_->RegisterClientAsReceiver(shiftboss_client_id_, kShiftbossRegistrationResponseMessage);
-
- bus_->RegisterClientAsReceiver(shiftboss_client_id_, kQueryInitiateMessage);
- bus_->RegisterClientAsSender(shiftboss_client_id_, kQueryInitiateResponseMessage);
-
- bus_->RegisterClientAsReceiver(shiftboss_client_id_, kInitiateRebuildMessage);
- bus_->RegisterClientAsSender(shiftboss_client_id_, kInitiateRebuildResponseMessage);
-
- bus_->RegisterClientAsReceiver(shiftboss_client_id_, kSaveQueryResultMessage);
- bus_->RegisterClientAsSender(shiftboss_client_id_, kSaveQueryResultResponseMessage);
-
- // Message sent to Worker.
- bus_->RegisterClientAsSender(shiftboss_client_id_, kShiftbossRegistrationResponseMessage);
- bus_->RegisterClientAsSender(shiftboss_client_id_, kRebuildWorkOrderMessage);
-
- // Forward the following message types from Foreman to Workers.
- bus_->RegisterClientAsReceiver(shiftboss_client_id_, kWorkOrderMessage);
- bus_->RegisterClientAsSender(shiftboss_client_id_, kWorkOrderMessage);
-
- // Forward the following message types from Workers to Foreman.
- bus_->RegisterClientAsReceiver(shiftboss_client_id_, kCatalogRelationNewBlockMessage);
- bus_->RegisterClientAsSender(shiftboss_client_id_, kCatalogRelationNewBlockMessage);
-
- bus_->RegisterClientAsReceiver(shiftboss_client_id_, kDataPipelineMessage);
- bus_->RegisterClientAsSender(shiftboss_client_id_, kDataPipelineMessage);
-
- bus_->RegisterClientAsReceiver(shiftboss_client_id_, kWorkOrderFeedbackMessage);
- bus_->RegisterClientAsSender(shiftboss_client_id_, kWorkOrderFeedbackMessage);
-
- bus_->RegisterClientAsReceiver(shiftboss_client_id_, kWorkOrderCompleteMessage);
- bus_->RegisterClientAsSender(shiftboss_client_id_, kWorkOrderCompleteMessage);
-
- bus_->RegisterClientAsReceiver(shiftboss_client_id_, kRebuildWorkOrderCompleteMessage);
- bus_->RegisterClientAsSender(shiftboss_client_id_, kRebuildWorkOrderCompleteMessage);
-
- // Clean up query execution states, i.e., QueryContext.
- bus_->RegisterClientAsReceiver(shiftboss_client_id_, kQueryTeardownMessage);
-
- // Stop itself.
- bus_->RegisterClientAsReceiver(shiftboss_client_id_, kPoisonMessage);
- // Stop all workers.
- bus_->RegisterClientAsSender(shiftboss_client_id_, kPoisonMessage);
-
- for (std::size_t i = 0; i < workers_->getNumWorkers(); ++i) {
- worker_addresses_.AddRecipient(workers_->getClientID(i));
- }
-
- registerWithForeman();
- }
+ void *hdfs,
+ const int cpu_id = -1);
~Shiftboss() override {
}
@@ -160,7 +89,7 @@ class Shiftboss : public Thread {
* @return TMB client ID of shiftboss thread.
**/
inline tmb::client_id getBusClientID() const {
- return shiftboss_client_id_;
+ return shiftboss_client_id_global_;
}
/**
@@ -231,9 +160,7 @@ class Shiftboss : public Thread {
const QueryContext::insert_destination_id dest_index,
const relation_id rel_id);
- // TODO(zuyu): Use two buses for the message communication between Foreman and Shiftboss,
- // and Shiftboss and Worker thread pool.
- tmb::MessageBus *bus_;
+ tmb::MessageBus *bus_global_, *bus_local_;
CatalogDatabaseCache database_cache_;
StorageManager *storage_manager_;
@@ -245,7 +172,7 @@ class Shiftboss : public Thread {
// The ID of the CPU that the Shiftboss thread can optionally be pinned to.
const int cpu_id_;
- tmb::client_id shiftboss_client_id_, foreman_client_id_;
+ tmb::client_id shiftboss_client_id_global_, shiftboss_client_id_local_, foreman_client_id_;
// Unique per Shiftboss instance.
std::uint64_t shiftboss_index_;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/469c777a/query_optimizer/tests/DistributedExecutionGeneratorTestRunner.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/DistributedExecutionGeneratorTestRunner.cpp b/query_optimizer/tests/DistributedExecutionGeneratorTestRunner.cpp
index 2e18467..51b1ccb 100644
--- a/query_optimizer/tests/DistributedExecutionGeneratorTestRunner.cpp
+++ b/query_optimizer/tests/DistributedExecutionGeneratorTestRunner.cpp
@@ -76,6 +76,7 @@ const char *DistributedExecutionGeneratorTestRunner::kResetOption =
DistributedExecutionGeneratorTestRunner::DistributedExecutionGeneratorTestRunner(const string &storage_path)
: query_id_(0),
+ bus_locals_(kNumInstances),
data_exchangers_(kNumInstances) {
bus_.Initialize();
@@ -113,7 +114,10 @@ DistributedExecutionGeneratorTestRunner::DistributedExecutionGeneratorTestRunner
kAnyNUMANodeID);
for (int i = 0; i < kNumInstances; ++i) {
- workers_.push_back(make_unique<Worker>(0 /* worker_thread_index */, &bus_));
+ tmb::MessageBus *bus_local = &bus_locals_[i];
+ bus_local->Initialize();
+
+ workers_.push_back(make_unique<Worker>(0 /* worker_thread_index */, bus_local));
const vector<tmb::client_id> worker_client_ids(1, workers_.back()->getBusClientID());
worker_directories_.push_back(
@@ -128,7 +132,8 @@ DistributedExecutionGeneratorTestRunner::DistributedExecutionGeneratorTestRunner
data_exchangers_[i].set_storage_manager(storage_manager.get());
shiftbosses_.push_back(
- make_unique<Shiftboss>(&bus_, storage_manager.get(), worker_directories_.back().get()));
+ make_unique<Shiftboss>(&bus_, bus_local, storage_manager.get(), worker_directories_.back().get(),
+ nullptr /* hdfs */));
storage_managers_.push_back(move(storage_manager));
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/469c777a/query_optimizer/tests/DistributedExecutionGeneratorTestRunner.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/DistributedExecutionGeneratorTestRunner.hpp b/query_optimizer/tests/DistributedExecutionGeneratorTestRunner.hpp
index 63e320d..2cd2427 100644
--- a/query_optimizer/tests/DistributedExecutionGeneratorTestRunner.hpp
+++ b/query_optimizer/tests/DistributedExecutionGeneratorTestRunner.hpp
@@ -129,6 +129,7 @@ class DistributedExecutionGeneratorTestRunner : public TextBasedTestRunner {
std::unique_ptr<ForemanDistributed> foreman_;
+ std::vector<MessageBusImpl> bus_locals_;
std::vector<std::unique_ptr<Worker>> workers_;
std::vector<std::unique_ptr<WorkerDirectory>> worker_directories_;
std::vector<DataExchangerAsync> data_exchangers_;
[2/5] incubator-quickstep git commit: Printed out the row number of
the query result in the distributed version.
Posted by zu...@apache.org.
Printed out the row number of the query result in the distributed version.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/fead6f82
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/fead6f82
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/fead6f82
Branch: refs/heads/two-level-tmb
Commit: fead6f8204b59c0037a7a72120f0231289a59d36
Parents: a8e5618
Author: Zuyu Zhang <zu...@apache.org>
Authored: Thu Feb 9 21:31:02 2017 -0800
Committer: Zuyu Zhang <zu...@apache.org>
Committed: Thu Feb 9 21:31:47 2017 -0800
----------------------------------------------------------------------
cli/distributed/Cli.cpp | 1 +
1 file changed, 1 insertion(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/fead6f82/cli/distributed/Cli.cpp
----------------------------------------------------------------------
diff --git a/cli/distributed/Cli.cpp b/cli/distributed/Cli.cpp
index 386654d..60b9c8d 100644
--- a/cli/distributed/Cli.cpp
+++ b/cli/distributed/Cli.cpp
@@ -197,6 +197,7 @@ void Cli::run() {
const CatalogRelation result_relation(proto.result_relation());
PrintToScreen::PrintRelation(result_relation, storage_manager_.get(), stdout);
+ PrintToScreen::PrintOutputSize(result_relation, storage_manager_.get(), stdout);
const vector<block_id> blocks(result_relation.getBlocksSnapshot());
for (const block_id block : blocks) {
[4/5] incubator-quickstep git commit: Added HDFS Support For
TextScanWorkOrder.
Posted by zu...@apache.org.
Added HDFS Support For TextScanWorkOrder.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/72417f79
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/72417f79
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/72417f79
Branch: refs/heads/two-level-tmb
Commit: 72417f7965a2d4dd8f235f7b3c30fc2c718f8475
Parents: fead6f8
Author: Zuyu Zhang <zu...@apache.org>
Authored: Mon Feb 6 14:42:42 2017 -0800
Committer: Zuyu Zhang <zu...@apache.org>
Committed: Thu Feb 9 21:32:17 2017 -0800
----------------------------------------------------------------------
cli/distributed/Executor.cpp | 2 +-
query_execution/CMakeLists.txt | 1 +
query_execution/Shiftboss.cpp | 3 +-
query_execution/Shiftboss.hpp | 14 ++++
relational_operators/CMakeLists.txt | 5 ++
relational_operators/TextScanOperator.cpp | 104 ++++++++++++++++++++++---
relational_operators/TextScanOperator.hpp | 10 ++-
relational_operators/WorkOrderFactory.cpp | 6 +-
relational_operators/WorkOrderFactory.hpp | 4 +-
storage/FileManagerHdfs.hpp | 9 +++
storage/StorageManager.cpp | 9 +++
storage/StorageManager.hpp | 8 +-
12 files changed, 155 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/72417f79/cli/distributed/Executor.cpp
----------------------------------------------------------------------
diff --git a/cli/distributed/Executor.cpp b/cli/distributed/Executor.cpp
index 1d03579..3485298 100644
--- a/cli/distributed/Executor.cpp
+++ b/cli/distributed/Executor.cpp
@@ -76,7 +76,7 @@ void Executor::init() {
data_exchanger_.start();
shiftboss_ =
- make_unique<Shiftboss>(&bus_, storage_manager_.get(), worker_directory_.get());
+ make_unique<Shiftboss>(&bus_, storage_manager_.get(), worker_directory_.get(), storage_manager_->hdfs());
shiftboss_->start();
for (const auto &worker : workers_) {
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/72417f79/query_execution/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_execution/CMakeLists.txt b/query_execution/CMakeLists.txt
index 5ad6999..3a69f77 100644
--- a/query_execution/CMakeLists.txt
+++ b/query_execution/CMakeLists.txt
@@ -292,6 +292,7 @@ if (ENABLE_DISTRIBUTED)
quickstep_queryexecution_WorkerMessage
quickstep_relationaloperators_RebuildWorkOrder
quickstep_relationaloperators_WorkOrderFactory
+ quickstep_storage_Flags
quickstep_storage_InsertDestination
quickstep_storage_StorageBlock
quickstep_storage_StorageManager
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/72417f79/query_execution/Shiftboss.cpp
----------------------------------------------------------------------
diff --git a/query_execution/Shiftboss.cpp b/query_execution/Shiftboss.cpp
index 2ed42d0..bae5205 100644
--- a/query_execution/Shiftboss.cpp
+++ b/query_execution/Shiftboss.cpp
@@ -104,7 +104,8 @@ void Shiftboss::run() {
query_contexts_[query_id].get(),
storage_manager_,
shiftboss_client_id_,
- bus_);
+ bus_,
+ hdfs_);
unique_ptr<WorkerMessage> worker_message(
WorkerMessage::WorkOrderMessage(work_order, proto.operator_index()));
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/72417f79/query_execution/Shiftboss.hpp
----------------------------------------------------------------------
diff --git a/query_execution/Shiftboss.hpp b/query_execution/Shiftboss.hpp
index 6538d48..0750cca 100644
--- a/query_execution/Shiftboss.hpp
+++ b/query_execution/Shiftboss.hpp
@@ -30,6 +30,8 @@
#include "query_execution/QueryContext.hpp"
#include "query_execution/QueryExecutionTypedefs.hpp"
#include "query_execution/WorkerDirectory.hpp"
+#include "storage/Flags.hpp"
+#include "storage/StorageConfig.h" // For QUICKSTEP_HAVE_FILE_MANAGER_HDFS.
#include "threading/Thread.hpp"
#include "utility/Macros.hpp"
@@ -64,6 +66,7 @@ class Shiftboss : public Thread {
* @param bus A pointer to the TMB.
* @param storage_manager The StorageManager to use.
* @param workers A pointer to the WorkerDirectory.
+ * @param hdfs The HDFS connector via libhdfs3.
* @param cpu_id The ID of the CPU to which the Shiftboss thread can be pinned.
*
* @note If cpu_id is not specified, Shiftboss thread can be possibly moved
@@ -72,10 +75,12 @@ class Shiftboss : public Thread {
Shiftboss(tmb::MessageBus *bus,
StorageManager *storage_manager,
WorkerDirectory *workers,
+ void *hdfs = nullptr,
const int cpu_id = -1)
: bus_(DCHECK_NOTNULL(bus)),
storage_manager_(DCHECK_NOTNULL(storage_manager)),
workers_(DCHECK_NOTNULL(workers)),
+ hdfs_(hdfs),
cpu_id_(cpu_id),
shiftboss_client_id_(tmb::kClientIdNone),
foreman_client_id_(tmb::kClientIdNone),
@@ -84,6 +89,12 @@ class Shiftboss : public Thread {
// Check to have at least one Worker.
DCHECK_GT(workers->getNumWorkers(), 0u);
+#ifdef QUICKSTEP_HAVE_FILE_MANAGER_HDFS
+ if (FLAGS_use_hdfs) {
+ CHECK(hdfs_);
+ }
+#endif // QUICKSTEP_HAVE_FILE_MANAGER_HDFS
+
shiftboss_client_id_ = bus_->Connect();
LOG(INFO) << "Shiftboss TMB client ID: " << shiftboss_client_id_;
DCHECK_NE(shiftboss_client_id_, tmb::kClientIdNone);
@@ -228,6 +239,9 @@ class Shiftboss : public Thread {
StorageManager *storage_manager_;
WorkerDirectory *workers_;
+ // Not owned.
+ void *hdfs_;
+
// The ID of the CPU that the Shiftboss thread can optionally be pinned to.
const int cpu_id_;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/72417f79/relational_operators/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/relational_operators/CMakeLists.txt b/relational_operators/CMakeLists.txt
index 457d58a..1693ec2 100644
--- a/relational_operators/CMakeLists.txt
+++ b/relational_operators/CMakeLists.txt
@@ -491,6 +491,7 @@ target_link_libraries(quickstep_relationaloperators_TextScanOperator
quickstep_relationaloperators_RelationalOperator
quickstep_relationaloperators_WorkOrder
quickstep_relationaloperators_WorkOrder_proto
+ quickstep_storage_Flags
quickstep_storage_InsertDestination
quickstep_types_Type
quickstep_types_TypedValue
@@ -500,6 +501,10 @@ target_link_libraries(quickstep_relationaloperators_TextScanOperator
quickstep_utility_Glob
quickstep_utility_Macros
tmb)
+if (QUICKSTEP_HAVE_FILE_MANAGER_HDFS)
+ target_link_libraries(quickstep_relationaloperators_TextScanOperator
+ ${LIBHDFS3_LIBRARIES})
+endif(QUICKSTEP_HAVE_FILE_MANAGER_HDFS)
target_link_libraries(quickstep_relationaloperators_UpdateOperator
glog
quickstep_catalog_CatalogRelation
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/72417f79/relational_operators/TextScanOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/TextScanOperator.cpp b/relational_operators/TextScanOperator.cpp
index 0a83a85..a06c55b 100644
--- a/relational_operators/TextScanOperator.cpp
+++ b/relational_operators/TextScanOperator.cpp
@@ -41,7 +41,14 @@
#include "query_execution/WorkOrderProtosContainer.hpp"
#include "query_execution/WorkOrdersContainer.hpp"
#include "relational_operators/WorkOrder.pb.h"
+#include "storage/Flags.hpp"
#include "storage/InsertDestination.hpp"
+#include "storage/StorageConfig.h" // For QUICKSTEP_HAVE_FILE_MANAGER_HDFS.
+
+#ifdef QUICKSTEP_HAVE_FILE_MANAGER_HDFS
+#include <hdfs/hdfs.h>
+#endif // QUICKSTEP_HAVE_FILE_MANAGER_HDFS
+
#include "types/Type.hpp"
#include "types/TypedValue.hpp"
#include "types/containers/ColumnVector.hpp"
@@ -205,14 +212,53 @@ void TextScanWorkOrder::execute() {
std::vector<TypedValue> vector_tuple_returned;
constexpr std::size_t kSmallBufferSize = 0x4000;
- char *buffer = reinterpret_cast<char *>(malloc(std::max(text_segment_size_, kSmallBufferSize)));
-
- // Read text segment into buffer.
- FILE *file = std::fopen(filename_.c_str(), "rb");
- std::fseek(file, text_offset_, SEEK_SET);
- std::size_t bytes_read = std::fread(buffer, 1, text_segment_size_, file);
- if (bytes_read != text_segment_size_) {
- throw TextScanReadError(filename_);
+ const size_t buffer_size = std::max(text_segment_size_, kSmallBufferSize);
+ char *buffer = reinterpret_cast<char *>(malloc(buffer_size));
+
+ bool use_hdfs = false;
+ std::size_t bytes_read;
+
+#ifdef QUICKSTEP_HAVE_FILE_MANAGER_HDFS
+ hdfsFS hdfs = nullptr;
+ hdfsFile file_handle = nullptr;
+
+ if (FLAGS_use_hdfs) {
+ use_hdfs = true;
+ hdfs = static_cast<hdfsFS>(hdfs_);
+
+ file_handle = hdfsOpenFile(hdfs, filename_.c_str(), O_RDONLY, buffer_size,
+ 0 /* default replication */, 0 /* default block size */);
+ if (file_handle == nullptr) {
+ LOG(ERROR) << "Failed to open file " << filename_ << " with error: " << strerror(errno);
+ return;
+ }
+
+ if (hdfsSeek(hdfs, file_handle, text_offset_)) {
+ LOG(ERROR) << "Failed to seek in file " << filename_ << " with error: " << strerror(errno);
+
+ hdfsCloseFile(hdfs, file_handle);
+ return;
+ }
+
+ bytes_read = hdfsRead(hdfs, file_handle, buffer, text_segment_size_);
+ if (bytes_read != text_segment_size_) {
+ hdfsCloseFile(hdfs, file_handle);
+ throw TextScanReadError(filename_);
+ }
+ }
+#endif // QUICKSTEP_HAVE_FILE_MANAGER_HDFS
+
+ FILE *file = nullptr;
+ if (!use_hdfs) {
+ // Read text segment into buffer.
+ file = std::fopen(filename_.c_str(), "rb");
+ std::fseek(file, text_offset_, SEEK_SET);
+ bytes_read = std::fread(buffer, 1, text_segment_size_, file);
+
+ if (bytes_read != text_segment_size_) {
+ std::fclose(file);
+ throw TextScanReadError(filename_);
+ }
}
// Locate the first newline character.
@@ -266,10 +312,36 @@ void TextScanWorkOrder::execute() {
// that the last tuple is very small / very large.
std::size_t dynamic_read_size = 1024;
std::string row_string;
- std::fseek(file, text_offset_ + (end_ptr - buffer), SEEK_SET);
+
+ const size_t dynamic_read_offset = text_offset_ + (end_ptr - buffer);
+ if (use_hdfs) {
+#ifdef QUICKSTEP_HAVE_FILE_MANAGER_HDFS
+ if (hdfsSeek(hdfs, file_handle, dynamic_read_offset)) {
+ LOG(ERROR) << "Failed to seek in file " << filename_ << " with error: " << strerror(errno);
+
+ hdfsCloseFile(hdfs, file_handle);
+ return;
+ }
+#endif // QUICKSTEP_HAVE_FILE_MANAGER_HDFS
+ } else {
+ std::fseek(file, dynamic_read_offset, SEEK_SET);
+ }
+
bool has_reached_end = false;
do {
- bytes_read = std::fread(buffer, 1, dynamic_read_size, file);
+ if (use_hdfs) {
+#ifdef QUICKSTEP_HAVE_FILE_MANAGER_HDFS
+ bytes_read = hdfsRead(hdfs, file_handle, buffer, dynamic_read_size);
+
+ // Read again when acrossing the HDFS block boundary.
+ if (bytes_read != dynamic_read_size) {
+ bytes_read += hdfsRead(hdfs, file_handle, buffer + bytes_read, dynamic_read_size - bytes_read);
+ }
+#endif // QUICKSTEP_HAVE_FILE_MANAGER_HDFS
+ } else {
+ bytes_read = std::fread(buffer, 1, dynamic_read_size, file);
+ }
+
std::size_t bytes_to_copy = bytes_read;
for (std::size_t i = 0; i < bytes_read; ++i) {
@@ -303,7 +375,14 @@ void TextScanWorkOrder::execute() {
}
}
- std::fclose(file);
+ if (use_hdfs) {
+#ifdef QUICKSTEP_HAVE_FILE_MANAGER_HDFS
+ hdfsCloseFile(hdfs, file_handle);
+#endif // QUICKSTEP_HAVE_FILE_MANAGER_HDFS
+ } else {
+ std::fclose(file);
+ }
+
free(buffer);
// Store the tuples in a ColumnVectorsValueAccessor for bulk insert.
@@ -334,7 +413,8 @@ void TextScanWorkOrder::execute() {
}
std::vector<TypedValue> TextScanWorkOrder::parseRow(const char **row_ptr,
- const CatalogRelationSchema &relation, bool *is_faulty) const {
+ const CatalogRelationSchema &relation,
+ bool *is_faulty) const {
std::vector<TypedValue> attribute_values;
// Always assume current row is not faulty initially.
*is_faulty = false;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/72417f79/relational_operators/TextScanOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/TextScanOperator.hpp b/relational_operators/TextScanOperator.hpp
index eada190..59821fc 100644
--- a/relational_operators/TextScanOperator.hpp
+++ b/relational_operators/TextScanOperator.hpp
@@ -189,6 +189,7 @@ class TextScanWorkOrder : public WorkOrder {
* @param process_escape_sequences Whether to decode escape sequences in the
* text file.
* @param output_destination The InsertDestination to insert tuples.
+ * @param hdfs The HDFS connector via libhdfs3.
**/
TextScanWorkOrder(
const std::size_t query_id,
@@ -197,14 +198,16 @@ class TextScanWorkOrder : public WorkOrder {
const std::size_t text_segment_size,
const char field_terminator,
const bool process_escape_sequences,
- InsertDestination *output_destination)
+ InsertDestination *output_destination,
+ void *hdfs = nullptr)
: WorkOrder(query_id),
filename_(filename),
text_offset_(text_offset),
text_segment_size_(text_segment_size),
field_terminator_(field_terminator),
process_escape_sequences_(process_escape_sequences),
- output_destination_(DCHECK_NOTNULL(output_destination)) {}
+ output_destination_(DCHECK_NOTNULL(output_destination)),
+ hdfs_(hdfs) {}
~TextScanWorkOrder() override {}
@@ -332,6 +335,9 @@ class TextScanWorkOrder : public WorkOrder {
InsertDestination *output_destination_;
+ // Not owned.
+ void *hdfs_;
+
DISALLOW_COPY_AND_ASSIGN(TextScanWorkOrder);
};
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/72417f79/relational_operators/WorkOrderFactory.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/WorkOrderFactory.cpp b/relational_operators/WorkOrderFactory.cpp
index d2c8251..cf0ee74 100644
--- a/relational_operators/WorkOrderFactory.cpp
+++ b/relational_operators/WorkOrderFactory.cpp
@@ -75,7 +75,8 @@ WorkOrder* WorkOrderFactory::ReconstructFromProto(const serialization::WorkOrder
QueryContext *query_context,
StorageManager *storage_manager,
const tmb::client_id shiftboss_client_id,
- tmb::MessageBus *bus) {
+ tmb::MessageBus *bus,
+ void *hdfs) {
DCHECK(query_context != nullptr);
DCHECK(ProtoIsValid(proto, *catalog_database, *query_context))
<< "Attempted to create WorkOrder from an invalid proto description:\n"
@@ -473,7 +474,8 @@ WorkOrder* WorkOrderFactory::ReconstructFromProto(const serialization::WorkOrder
proto.GetExtension(serialization::TextScanWorkOrder::field_terminator),
proto.GetExtension(serialization::TextScanWorkOrder::process_escape_sequences),
query_context->getInsertDestination(
- proto.GetExtension(serialization::TextScanWorkOrder::insert_destination_index)));
+ proto.GetExtension(serialization::TextScanWorkOrder::insert_destination_index)),
+ hdfs);
}
case serialization::UPDATE: {
LOG(INFO) << "Creating UpdateWorkOrder in Shiftboss " << shiftboss_index;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/72417f79/relational_operators/WorkOrderFactory.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/WorkOrderFactory.hpp b/relational_operators/WorkOrderFactory.hpp
index acf3855..ece687b 100644
--- a/relational_operators/WorkOrderFactory.hpp
+++ b/relational_operators/WorkOrderFactory.hpp
@@ -59,6 +59,7 @@ class WorkOrderFactory {
* @param storage_manager The StorageManager to use.
* @param shiftboss_client_id The TMB client id of Shiftboss.
* @param bus A pointer to the TMB.
+ * @param hdfs The HDFS connector via libhdfs3.
*
* @return A new WorkOrder reconstructed from the supplied Protocol Buffer.
**/
@@ -68,7 +69,8 @@ class WorkOrderFactory {
QueryContext *query_context,
StorageManager *storage_manager,
const tmb::client_id shiftboss_client_id,
- tmb::MessageBus *bus);
+ tmb::MessageBus *bus,
+ void *hdfs);
/**
* @brief Check whether a serialization::WorkOrder is fully-formed and
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/72417f79/storage/FileManagerHdfs.hpp
----------------------------------------------------------------------
diff --git a/storage/FileManagerHdfs.hpp b/storage/FileManagerHdfs.hpp
index f47e4a8..a8feb50 100644
--- a/storage/FileManagerHdfs.hpp
+++ b/storage/FileManagerHdfs.hpp
@@ -55,6 +55,15 @@ class FileManagerHdfs : public FileManager {
block_id_counter getMaxUsedBlockCounter(const block_id_domain block_domain) const override;
+ /**
+ * @brief Get the HDFS connector via libhdfs3.
+ *
+ * @return The HDFS connector.
+ **/
+ void* hdfs() {
+ return static_cast<void*>(hdfs_);
+ }
+
private:
// libhdfs3 has an API to release this pointer.
hdfsFS hdfs_;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/72417f79/storage/StorageManager.cpp
----------------------------------------------------------------------
diff --git a/storage/StorageManager.cpp b/storage/StorageManager.cpp
index 6f7d38b..872e8cc 100644
--- a/storage/StorageManager.cpp
+++ b/storage/StorageManager.cpp
@@ -570,6 +570,15 @@ bool StorageManager::DataExchangerClientAsync::Pull(const block_id block,
return true;
}
+void* StorageManager::hdfs() {
+#ifdef QUICKSTEP_HAVE_FILE_MANAGER_HDFS
+ if (FLAGS_use_hdfs) {
+ return static_cast<FileManagerHdfs*>(file_manager_.get())->hdfs();
+ }
+#endif // QUICKSTEP_HAVE_FILE_MANAGER_HDFS
+ return nullptr;
+}
+
vector<string> StorageManager::getPeerDomainNetworkAddresses(const block_id block) {
serialization::BlockMessage proto;
proto.set_block_id(block);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/72417f79/storage/StorageManager.hpp
----------------------------------------------------------------------
diff --git a/storage/StorageManager.hpp b/storage/StorageManager.hpp
index 42176ee..dc4b7e8 100644
--- a/storage/StorageManager.hpp
+++ b/storage/StorageManager.hpp
@@ -41,7 +41,6 @@
#include "storage/StorageBlob.hpp"
#include "storage/StorageBlock.hpp"
#include "storage/StorageBlockInfo.hpp"
-#include "storage/StorageConfig.h"
#include "storage/StorageConstants.hpp"
#include "threading/SpinSharedMutex.hpp"
#include "utility/Macros.hpp"
@@ -395,6 +394,13 @@ class StorageManager {
void pullBlockOrBlob(const block_id block, PullResponse *response) const;
#endif
+ /**
+ * @brief Get the HDFS connector via libhdfs3.
+ *
+ * @return The HDFS connector.
+ **/
+ void* hdfs();
+
private:
struct BlockHandle {
void *block_memory;