You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by hb...@apache.org on 2016/06/09 17:37:09 UTC
[01/12] incubator-quickstep git commit: Disabled Clang Temporarily.
[Forced Update!]
Repository: incubator-quickstep
Updated Branches:
refs/heads/query-manager-used-in-foreman a690455ec -> 94a2e1dcb (forced update)
Disabled Clang Temporarily.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/eab1c9a4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/eab1c9a4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/eab1c9a4
Branch: refs/heads/query-manager-used-in-foreman
Commit: eab1c9a4f80553d500afdf8cb385822b03aaad0d
Parents: fd75e17
Author: Zuyu Zhang <zz...@pivotal.io>
Authored: Wed Jun 8 18:45:37 2016 -0700
Committer: Zuyu Zhang <zz...@pivotal.io>
Committed: Wed Jun 8 20:23:45 2016 -0700
----------------------------------------------------------------------
.travis.yml | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/eab1c9a4/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 483a02b..08d6f38 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -12,7 +12,7 @@ cache: ccache
compiler:
- gcc
- - clang
+ # clang
env:
- BUILD_TYPE=Debug VECTOR_COPY_ELISION_LEVEL=joinwithbinaryexpressions
@@ -21,7 +21,7 @@ env:
- BUILD_TYPE=Release VECTOR_COPY_ELISION_LEVEL=none
install:
- - if [ "$VECTOR_COPY_ELISION_LEVEL" = "joinwithbinaryexpressions" ] && [ "$CC" = "gcc" ] && [ "$BUILD_TYPE" = "Debug" ]; then
+ - if [ "$VECTOR_COPY_ELISION_LEVEL" = "joinwithbinaryexpressions" ] && [ "$CC" = "gcc" ]; then
export MAKE_JOBS=1;
else
export MAKE_JOBS=2;
@@ -77,11 +77,9 @@ addons:
apt:
sources:
- ubuntu-toolchain-r-test
- - llvm-toolchain-precise-3.7
packages:
- gcc-5
- g++-5
- - clang-3.7
- binutils-gold
- libprotobuf-dev
- protobuf-compiler
[08/12] incubator-quickstep git commit: Improve text scan operator
Posted by hb...@apache.org.
Improve text scan operator
Fix a potential segfault with CompressedBlockBuilder
Fix a potential segfault with CompressedBlockBuilder
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/5346e7f7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/5346e7f7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/5346e7f7
Branch: refs/heads/query-manager-used-in-foreman
Commit: 5346e7f7bc9332ed62c0eb0c7471dc814e4597a3
Parents: 096abe2
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Thu Jun 9 03:18:37 2016 -0500
Committer: Jignesh Patel <jm...@hotmail.com>
Committed: Thu Jun 9 10:46:25 2016 -0500
----------------------------------------------------------------------
query_optimizer/ExecutionGenerator.cpp | 1 -
relational_operators/CMakeLists.txt | 23 +-
relational_operators/TextScanOperator.cpp | 818 ++++++-------------
relational_operators/TextScanOperator.hpp | 286 +++----
relational_operators/WorkOrder.proto | 15 +-
relational_operators/WorkOrderFactory.cpp | 72 +-
.../tests/TextScanOperator_unittest.cpp | 1 -
relational_operators/tests/text_scan_input.txt | 8 +-
storage/CompressedBlockBuilder.cpp | 3 +
9 files changed, 387 insertions(+), 840 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5346e7f7/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index 99c2a21..f9fd742 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -945,7 +945,6 @@ void ExecutionGenerator::convertCopyFrom(
physical_plan->file_name(),
physical_plan->column_delimiter(),
physical_plan->escape_strings(),
- FLAGS_parallelize_load,
*output_relation,
insert_destination_index));
insert_destination_proto->set_relational_op_index(scan_operator_index);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5346e7f7/relational_operators/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/relational_operators/CMakeLists.txt b/relational_operators/CMakeLists.txt
index d2693eb..eb73c07 100644
--- a/relational_operators/CMakeLists.txt
+++ b/relational_operators/CMakeLists.txt
@@ -1,5 +1,7 @@
# Copyright 2011-2015 Quickstep Technologies LLC.
# Copyright 2015-2016 Pivotal Software, Inc.
+# Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+# University of Wisconsin\u2014Madison.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -16,9 +18,6 @@
QS_PROTOBUF_GENERATE_CPP(relationaloperators_SortMergeRunOperator_proto_srcs
relationaloperators_SortMergeRunOperator_proto_hdrs
SortMergeRunOperator.proto)
-QS_PROTOBUF_GENERATE_CPP(relationaloperators_TextScanOperator_proto_srcs
- relationaloperators_TextScanOperator_proto_hdrs
- TextScanOperator.proto)
QS_PROTOBUF_GENERATE_CPP(relationaloperators_WorkOrder_proto_srcs
relationaloperators_WorkOrder_proto_hdrs
WorkOrder.proto)
@@ -61,9 +60,6 @@ add_library(quickstep_relationaloperators_SortRunGenerationOperator SortRunGener
SortRunGenerationOperator.hpp)
add_library(quickstep_relationaloperators_TableGeneratorOperator TableGeneratorOperator.cpp TableGeneratorOperator.hpp)
add_library(quickstep_relationaloperators_TextScanOperator TextScanOperator.cpp TextScanOperator.hpp)
-add_library(quickstep_relationaloperators_TextScanOperator_proto
- ${relationaloperators_TextScanOperator_proto_srcs}
- ${relationaloperators_TextScanOperator_proto_hdrs})
add_library(quickstep_relationaloperators_UpdateOperator UpdateOperator.cpp UpdateOperator.hpp)
add_library(quickstep_relationaloperators_WorkOrder ../empty_src.cpp WorkOrder.hpp)
add_library(quickstep_relationaloperators_WorkOrderFactory WorkOrderFactory.cpp WorkOrderFactory.hpp)
@@ -360,27 +356,19 @@ target_link_libraries(quickstep_relationaloperators_TextScanOperator
glog
quickstep_catalog_CatalogAttribute
quickstep_catalog_CatalogRelation
- quickstep_catalog_CatalogRelationSchema
quickstep_catalog_CatalogTypedefs
quickstep_queryexecution_QueryContext
- quickstep_queryexecution_QueryExecutionMessages_proto
- quickstep_queryexecution_QueryExecutionTypedefs
- quickstep_queryexecution_QueryExecutionUtil
quickstep_queryexecution_WorkOrdersContainer
quickstep_relationaloperators_RelationalOperator
- quickstep_relationaloperators_TextScanOperator_proto
quickstep_relationaloperators_WorkOrder
quickstep_storage_InsertDestination
- quickstep_storage_StorageBlob
- quickstep_storage_StorageBlockInfo
- quickstep_storage_StorageManager
- quickstep_threading_ThreadIDBasedMap
quickstep_types_Type
quickstep_types_TypedValue
+ quickstep_types_containers_ColumnVector
+ quickstep_types_containers_ColumnVectorsValueAccessor
quickstep_types_containers_Tuple
quickstep_utility_Glob
quickstep_utility_Macros
- quickstep_utility_ThreadSafeQueue
tmb)
target_link_libraries(quickstep_relationaloperators_UpdateOperator
glog
@@ -430,7 +418,6 @@ target_link_libraries(quickstep_relationaloperators_WorkOrderFactory
quickstep_relationaloperators_SortRunGenerationOperator
quickstep_relationaloperators_TableGeneratorOperator
quickstep_relationaloperators_TextScanOperator
- quickstep_relationaloperators_TextScanOperator_proto
quickstep_relationaloperators_UpdateOperator
quickstep_relationaloperators_WorkOrder_proto
quickstep_storage_StorageBlockInfo
@@ -438,7 +425,6 @@ target_link_libraries(quickstep_relationaloperators_WorkOrderFactory
tmb)
target_link_libraries(quickstep_relationaloperators_WorkOrder_proto
quickstep_relationaloperators_SortMergeRunOperator_proto
- quickstep_relationaloperators_TextScanOperator_proto
${PROTOBUF_LIBRARY})
# Module all-in-one library:
@@ -466,7 +452,6 @@ target_link_libraries(quickstep_relationaloperators
quickstep_relationaloperators_SortRunGenerationOperator
quickstep_relationaloperators_TableGeneratorOperator
quickstep_relationaloperators_TextScanOperator
- quickstep_relationaloperators_TextScanOperator_proto
quickstep_relationaloperators_UpdateOperator
quickstep_relationaloperators_WorkOrder
quickstep_relationaloperators_WorkOrderFactory
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5346e7f7/relational_operators/TextScanOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/TextScanOperator.cpp b/relational_operators/TextScanOperator.cpp
index 5acecbf..d2fd0cd 100644
--- a/relational_operators/TextScanOperator.cpp
+++ b/relational_operators/TextScanOperator.cpp
@@ -1,6 +1,8 @@
/**
* Copyright 2011-2015 Quickstep Technologies LLC.
* Copyright 2015-2016 Pivotal Software, Inc.
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin\u2014Madison.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -20,124 +22,30 @@
#include <algorithm>
#include <cctype>
#include <cstddef>
-#include <cstdint>
#include <cstdio>
#include <cstdlib>
-#include <cstring>
+#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "catalog/CatalogAttribute.hpp"
-#include "catalog/CatalogRelationSchema.hpp"
#include "query_execution/QueryContext.hpp"
-#include "query_execution/QueryExecutionMessages.pb.h"
-#include "query_execution/QueryExecutionUtil.hpp"
#include "query_execution/WorkOrdersContainer.hpp"
-#include "relational_operators/TextScanOperator.pb.h"
#include "storage/InsertDestination.hpp"
-#include "storage/StorageBlob.hpp"
-#include "storage/StorageBlockInfo.hpp"
-#include "storage/StorageManager.hpp"
-#include "threading/ThreadIDBasedMap.hpp"
#include "types/Type.hpp"
#include "types/TypedValue.hpp"
#include "types/containers/Tuple.hpp"
+#include "types/containers/ColumnVector.hpp"
+#include "types/containers/ColumnVectorsValueAccessor.hpp"
#include "utility/Glob.hpp"
-#include "gflags/gflags.h"
#include "glog/logging.h"
#include "tmb/id_typedefs.h"
-#include "tmb/message_bus.h"
-#include "tmb/tagged_message.h"
-
-using std::isxdigit;
-using std::size_t;
-using std::sscanf;
-using std::string;
namespace quickstep {
-DEFINE_uint64(textscan_split_blob_size, 2,
- "Size of blobs in number of slots the input text files "
- "are split into in the TextScanOperator.");
-
-// Check if blob size is positive.
-static bool ValidateTextScanSplitBlobSize(const char *flagname,
- std::uint64_t blob_size) {
- if (blob_size == 0) {
- LOG(ERROR) << "--" << flagname << " must be greater than 0";
- return false;
- }
-
- return true;
-}
-
-static const volatile bool text_scan_split_blob_size_dummy = gflags::RegisterFlagValidator(
- &FLAGS_textscan_split_blob_size, &ValidateTextScanSplitBlobSize);
-
-namespace {
-
-// Detect whether '*search_string' contains a row-terminator (either line-feed
-// or carriage-return + line-feed) immediately before 'end_pos'. If
-// 'process_escape_sequences' is true, this function will also eliminate
-// false-positives from an escaped row-terminator. Returns the number of
-// characters in the row-terminator, or 0 if no terminator is detected.
-inline unsigned DetectRowTerminator(const char *search_string,
- std::size_t end_pos,
- const bool process_escape_sequences) {
- if (end_pos == 0) {
- // Empty string.
- return 0;
- }
-
- if (search_string[end_pos - 1] != '\n') {
- // String doesn't end in newline.
- return 0;
- }
-
- if (end_pos == 1) {
- // String is the single newline character.
- return 1;
- }
-
- const bool have_carriage_return = (search_string[end_pos - 2] == '\r');
- if (have_carriage_return && (end_pos == 2)) {
- // String is CR-LF and nothing else.
- return 2;
- }
-
- std::size_t backslashes = 0;
- // Count consecutive backslashes preceding the terminator. If there is an odd
- // number of backslashes, then the terminator is escaped and doesn't count as
- // a real terminator. If there is an even number of backslashes, then each
- // pair is an escaped backslash literal and the terminator still counts.
- if (process_escape_sequences) {
- end_pos = end_pos - 2 - have_carriage_return;
- while (end_pos != 0) {
- if (search_string[end_pos] == '\\') {
- ++backslashes;
- --end_pos;
- if ((end_pos == 0) && (search_string[0] == '\\')) {
- // Don't forget to count a backslash at the very beginning of a string.
- ++backslashes;
- }
- } else {
- break;
- }
- }
- }
-
- if (backslashes & 0x1) {
- return 0;
- } else {
- return 1 + have_carriage_return;
- }
-}
-
-} // namespace
-
bool TextScanOperator::getAllWorkOrders(
WorkOrdersContainer *container,
QueryContext *query_context,
@@ -155,116 +63,50 @@ bool TextScanOperator::getAllWorkOrders(
InsertDestination *output_destination =
query_context->getInsertDestination(output_destination_index_);
- if (parallelize_load_) {
- // Parallel implementation: Split work orders are generated for each file
- // being bulk-loaded. (More than one file can be loaded, because we support
- // glob() semantics in file name.) These work orders read the input file,
- // and split them in the blobs that can be parsed independently.
- if (blocking_dependencies_met_) {
- if (!work_generated_) {
- // First, generate text-split work orders.
- for (const auto &file : files) {
- container->addNormalWorkOrder(
- new TextSplitWorkOrder(query_id_,
- file,
- process_escape_sequences_,
- storage_manager,
- op_index_,
- scheduler_client_id,
- bus),
- op_index_);
- ++num_split_work_orders_;
- }
- work_generated_ = true;
- return false;
- } else {
- // Check if there are blobs to parse.
- while (!text_blob_queue_.empty()) {
- const TextBlob blob_work = text_blob_queue_.popOne();
- container->addNormalWorkOrder(
- new TextScanWorkOrder(query_id_,
- blob_work.blob_id,
- blob_work.size,
- field_terminator_,
- process_escape_sequences_,
- output_destination,
- storage_manager),
- op_index_);
- }
- // Done if all split work orders are completed, and no blobs are left to
- // process.
- return num_done_split_work_orders_.load(std::memory_order_acquire) == num_split_work_orders_ &&
- text_blob_queue_.empty();
- }
- }
- return false;
- } else {
- // Serial implementation.
- if (blocking_dependencies_met_ && !work_generated_) {
- for (const auto &file : files) {
+ // Text segment size set to 256KB.
+ constexpr std::size_t kTextSegmentSize = 0x40000u;
+
+ if (blocking_dependencies_met_ && !work_generated_) {
+ for (const std::string &file : files) {
+ // Use standard C libary to retrieve the file size.
+ FILE *fp = std::fopen(file.c_str(), "rb");
+ std::fseek(fp, 0, SEEK_END);
+ const std::size_t file_size = std::ftell(fp);
+ std::fclose(fp);
+
+ std::size_t text_offset = 0;
+ while (text_offset < file_size) {
container->addNormalWorkOrder(
new TextScanWorkOrder(query_id_,
file,
+ text_offset,
+ std::min(kTextSegmentSize, file_size - text_offset),
field_terminator_,
process_escape_sequences_,
output_destination,
storage_manager),
op_index_);
+ text_offset += kTextSegmentSize;
}
- work_generated_ = true;
}
- return work_generated_;
- }
-}
-
-void TextScanOperator::receiveFeedbackMessage(const WorkOrder::FeedbackMessage &msg) {
- switch (msg.type()) {
- case kSplitWorkOrderCompletionMessage: {
- num_done_split_work_orders_.fetch_add(1, std::memory_order_release);
- break;
- }
- case kNewTextBlobMessage: {
- serialization::TextBlob proto;
- CHECK(proto.ParseFromArray(msg.payload(), msg.payload_size()));
- text_blob_queue_.push(TextBlob(proto.blob_id(), proto.size()));
- break;
- }
- default:
- LOG(ERROR) << "Unknown feedback message type for TextScanOperator";
+ work_generated_ = true;
}
+ return work_generated_;
}
TextScanWorkOrder::TextScanWorkOrder(const std::size_t query_id,
const std::string &filename,
+ const std::size_t text_offset,
+ const std::size_t text_segment_size,
const char field_terminator,
const bool process_escape_sequences,
InsertDestination *output_destination,
StorageManager *storage_manager)
: WorkOrder(query_id),
- is_file_(true),
filename_(filename),
+ text_offset_(text_offset),
+ text_segment_size_(text_segment_size),
field_terminator_(field_terminator),
- text_blob_(0),
- text_size_(0),
- process_escape_sequences_(process_escape_sequences),
- output_destination_(output_destination),
- storage_manager_(storage_manager) {
- DCHECK(output_destination_ != nullptr);
- DCHECK(storage_manager_ != nullptr);
-}
-
-TextScanWorkOrder::TextScanWorkOrder(const std::size_t query_id,
- const block_id text_blob,
- const std::size_t text_size,
- const char field_terminator,
- const bool process_escape_sequences,
- InsertDestination *output_destination,
- StorageManager *storage_manager)
- : WorkOrder(query_id),
- is_file_(false),
- field_terminator_(field_terminator),
- text_blob_(text_blob),
- text_size_(text_size),
process_escape_sequences_(process_escape_sequences),
output_destination_(output_destination),
storage_manager_(storage_manager) {
@@ -274,439 +116,293 @@ TextScanWorkOrder::TextScanWorkOrder(const std::size_t query_id,
void TextScanWorkOrder::execute() {
const CatalogRelationSchema &relation = output_destination_->getRelation();
+ std::vector<Tuple> tuples;
- string current_row_string;
- if (is_file_) {
- FILE *file = std::fopen(filename_.c_str(), "r");
- if (file == nullptr) {
- throw TextScanReadError(filename_);
- }
+ constexpr std::size_t kSmallBufferSize = 0x4000;
+ char *buffer = reinterpret_cast<char *>(malloc(std::max(text_segment_size_, kSmallBufferSize)));
- bool have_row = false;
- do {
- current_row_string.clear();
- have_row = readRowFromFile(file, ¤t_row_string);
- if (have_row) {
- Tuple tuple = parseRow(current_row_string, relation);
- output_destination_->insertTupleInBatch(tuple);
- }
- } while (have_row);
-
- std::fclose(file);
- } else {
- BlobReference blob = storage_manager_->getBlob(text_blob_);
- const char *blob_pos = static_cast<const char*>(blob->getMemory());
- const char *blob_end = blob_pos + text_size_;
- bool have_row = false;
- do {
- current_row_string.clear();
- have_row = readRowFromBlob(&blob_pos, blob_end, ¤t_row_string);
- if (have_row) {
- Tuple tuple = parseRow(current_row_string, relation);
- output_destination_->insertTupleInBatch(tuple);
- }
- } while (have_row);
-
- // Drop the consumed blob produced by TextSplitWorkOrder.
- blob.release();
- storage_manager_->deleteBlockOrBlobFile(text_blob_);
+ // Read text segment into buffer.
+ FILE *file = std::fopen(filename_.c_str(), "rb");
+ std::fseek(file, text_offset_, SEEK_SET);
+ std::size_t bytes_read = std::fread(buffer, 1, text_segment_size_, file);
+ if (bytes_read != text_segment_size_) {
+ throw TextScanReadError(filename_);
}
-}
-char TextScanWorkOrder::ParseOctalLiteral(const std::string &row_string,
- std::size_t *start_pos) {
- const std::size_t stop_pos = std::min(row_string.length(), *start_pos + 3);
-
- int value = 0;
- for (; *start_pos < stop_pos; ++*start_pos) {
- int char_value = row_string[*start_pos] - '0';
- if ((char_value >= 0) && (char_value < 8)) {
- value = value * 8 + char_value;
- } else {
- return value;
+ // Locate the first newline character.
+ const char *buffer_end = buffer + text_segment_size_;
+ const char *row_ptr = buffer;
+ if (text_offset_ != 0) {
+ while (row_ptr < buffer_end && *row_ptr != '\n') {
+ ++row_ptr;
}
+ } else {
+ --row_ptr;
}
- return value;
-}
-
-char TextScanWorkOrder::ParseHexLiteral(const std::string &row_string,
- std::size_t *start_pos) {
- const std::size_t stop_pos = std::min(row_string.length(), *start_pos + 2);
+ if (row_ptr >= buffer_end) {
+ // This block does not even contain a newline character.
+ return;
+ }
- int value = 0;
- for (; *start_pos < stop_pos; ++*start_pos) {
- if (!std::isxdigit(row_string[*start_pos])) {
- break;
- }
+ // Locate the last newline character.
+ const char *end_ptr = buffer_end - 1;
+ while (end_ptr > row_ptr && *end_ptr != '\n') {
+ --end_ptr;
+ }
- int char_value;
- if (std::isdigit(row_string[*start_pos])) {
- char_value = row_string[*start_pos] - '0';
- } else if (std::islower(row_string[*start_pos])) {
- char_value = row_string[*start_pos] - 'a' + 10;
+ // Advance both row_ptr and end_ptr by 1.
+ ++row_ptr;
+ ++end_ptr;
+ // Now row_ptr is pointing to the first character RIGHT AFTER the FIRST newline
+ // character in this text segment, and end_ptr is pointing to the first character
+ // RIGHT AFTER the LAST newline character in this text segment.
+
+ // Process the tuples which are between the first newline character and the
+ // last newline character.
+ while (row_ptr < end_ptr) {
+ if (*row_ptr == '\r' || *row_ptr == '\n') {
+ // Skip empty lines.
+ ++row_ptr;
} else {
- char_value = row_string[*start_pos] - 'A' + 10;
+ tuples.emplace_back(parseRow(&row_ptr, relation));
}
-
- value = value * 16 + char_value;
}
- return value;
-}
+ // Process the tuple that is right after the last newline character.
+ // NOTE(jianqiao): dynamic_read_size is trying to balance between the cases
+ // that the last tuple is very small / very large.
+ std::size_t dynamic_read_size = 1024;
+ std::string row_string;
+ std::fseek(file, text_offset_ + (end_ptr - buffer), SEEK_SET);
+ bool has_reached_end = false;
+ do {
+ bytes_read = std::fread(buffer, 1, dynamic_read_size, file);
+ std::size_t bytes_to_copy = bytes_read;
-bool TextScanWorkOrder::readRowFromFile(FILE *file, std::string *row_string) const {
- // Read up to 1023 chars + null-terminator at a time.
- static constexpr std::size_t kRowBufferSize = 1024;
- char row_buffer[kRowBufferSize];
- for (;;) {
- char *read_string = std::fgets(row_buffer, sizeof(row_buffer), file);
- if (read_string == nullptr) {
- if (std::feof(file)) {
- if (row_string->empty()) {
- return false;
- } else {
- throw TextScanFormatError("File ended without delimiter");
- }
- } else {
- throw TextScanReadError(filename_);
+ for (std::size_t i = 0; i < bytes_read; ++i) {
+ if (buffer[i] == '\n') {
+ bytes_to_copy = i + 1;
+ has_reached_end = true;
+ break;
}
}
-
- // Append the contents of the buffer to '*row_string', and see if we've
- // reached a genuine row-terminator yet.
- row_string->append(row_buffer);
- if (removeRowTerminator(row_string)) {
- row_string->push_back(field_terminator_);
- return true;
+ if (!has_reached_end && bytes_read != dynamic_read_size) {
+ has_reached_end = true;
}
- }
-}
-bool TextScanWorkOrder::readRowFromBlob(const char **start_pos,
- const char *end_pos,
- std::string *row_string) const {
- while (*start_pos != end_pos) {
- const char *next_newline = static_cast<const char*>(std::memchr(
- *start_pos,
- '\n',
- end_pos - *start_pos));
-
- if (next_newline == nullptr) {
- throw TextScanFormatError("File ended without delimiter");
- }
+ row_string.append(buffer, bytes_to_copy);
+ dynamic_read_size = std::min(dynamic_read_size * 2, kSmallBufferSize);
+ } while (!has_reached_end);
- // Append the blob's contents through the next newline to '*row_string',
- // and see if we've reached a genuine row-terminator yet.
- row_string->append(*start_pos, next_newline - *start_pos + 1);
- *start_pos = next_newline + 1;
- if (removeRowTerminator(row_string)) {
- row_string->push_back(field_terminator_);
- return true;
+ if (!row_string.empty()) {
+ if (row_string.back() != '\n') {
+ row_string.push_back('\n');
}
+ row_ptr = row_string.c_str();
+ tuples.emplace_back(parseRow(&row_ptr, relation));
}
- if (row_string->empty()) {
- return false;
- } else {
- throw TextScanFormatError("File ended without delimiter");
- }
-}
-
-bool TextScanWorkOrder::removeRowTerminator(std::string *row_string) const {
- unsigned row_term_chars = DetectRowTerminator(row_string->c_str(),
- row_string->length(),
- process_escape_sequences_);
- if (row_term_chars == 0) {
- return false;
- } else {
- row_string->resize(row_string->length() - row_term_chars);
- return true;
- }
-}
-
-bool TextScanWorkOrder::extractFieldString(const std::string &row_string,
- std::size_t *start_pos,
- std::string *field_string) const {
- // Check for NULL literal string.
- if (process_escape_sequences_
- && (row_string.length() - *start_pos >= 3)
- && (row_string[*start_pos] == '\\')
- && (row_string[*start_pos + 1] == 'N')
- && (row_string[*start_pos + 2] == field_terminator_)) {
- *start_pos += 3;
- return false;
- }
-
- // Scan up until terminator, expanding backslashed escape sequences as we go.
- std::size_t terminator_pos = row_string.find(field_terminator_, *start_pos);
- std::size_t scan_pos = *start_pos;
-
- if (process_escape_sequences_) {
- for (;;) {
- std::size_t backslash_pos = row_string.find('\\', scan_pos);
- if ((backslash_pos == std::string::npos) || (backslash_pos >= terminator_pos)) {
- // No more backslashes, or the next backslash is beyond the field
- // terminator.
- break;
- }
-
- // Copy up to the backslash.
- field_string->append(row_string, scan_pos, backslash_pos - scan_pos);
-
- if (backslash_pos + 1 == terminator_pos) {
- // The terminator we found was escaped by a backslash, so append the
- // literal terminator and re-scan for the next terminator character.
- field_string->push_back(field_terminator_);
- scan_pos = terminator_pos + 1;
- terminator_pos = row_string.find(field_terminator_, scan_pos);
- continue;
+ std::fclose(file);
+ free(buffer);
+
+ // Store the tuples in a ColumnVectorsValueAccessor for bulk insert.
+ ColumnVectorsValueAccessor column_vectors;
+ std::size_t attr_id = 0;
+ for (const auto &attribute : relation) {
+ const Type &attr_type = attribute.getType();
+ if (attr_type.isVariableLength()) {
+ std::unique_ptr<IndirectColumnVector> column(
+ new IndirectColumnVector(attr_type, tuples.size()));
+ for (const auto &tuple : tuples) {
+ column->appendTypedValue(tuple.getAttributeValue(attr_id));
}
-
- // Expand escape sequence.
- switch (row_string[backslash_pos + 1]) {
- case '0': // Fallthrough for octal digits.
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- // Octal char literal.
- scan_pos = backslash_pos + 1;
- field_string->push_back(ParseOctalLiteral(row_string, &scan_pos));
- break;
- case 'N': {
- // Null literal after some other column data.
- throw TextScanFormatError(
- "Null indicator '\\N' encountered in text scan mixed in with "
- "other column data.");
- }
- case '\\':
- // Backslash.
- field_string->push_back('\\');
- scan_pos = backslash_pos + 2;
- break;
- case 'b':
- // Backspace.
- field_string->push_back('\b');
- scan_pos = backslash_pos + 2;
- break;
- case 'f':
- // Form-feed.
- field_string->push_back('\f');
- scan_pos = backslash_pos + 2;
- break;
- case 'n':
- // Newline.
- field_string->push_back('\n');
- scan_pos = backslash_pos + 2;
- break;
- case 'r':
- // Carriage return.
- field_string->push_back('\r');
- scan_pos = backslash_pos + 2;
- break;
- case 't':
- // Tab.
- field_string->push_back('\t');
- scan_pos = backslash_pos + 2;
- break;
- case 'v':
- // Vertical tab.
- field_string->push_back('\v');
- scan_pos = backslash_pos + 2;
- break;
- case 'x':
- if ((backslash_pos + 2 < row_string.length()) && std::isxdigit(row_string[backslash_pos + 2])) {
- // Hexidecimal char literal.
- scan_pos = backslash_pos + 2;
- field_string->push_back(ParseHexLiteral(row_string, &scan_pos));
- } else {
- // Just an escaped 'x' with no hex digits.
- field_string->push_back('x');
- scan_pos = backslash_pos + 2;
- }
- break;
- default:
- // Append escaped character as-is.
- field_string->push_back(row_string[backslash_pos + 1]);
- scan_pos = backslash_pos + 2;
- break;
+ column_vectors.addColumn(column.release());
+ } else {
+ std::unique_ptr<NativeColumnVector> column(
+ new NativeColumnVector(attr_type, tuples.size()));
+ for (const auto &tuple : tuples) {
+ column->appendTypedValue(tuple.getAttributeValue(attr_id));
}
+ column_vectors.addColumn(column.release());
}
+ ++attr_id;
}
- DCHECK_NE(terminator_pos, std::string::npos);
- field_string->append(row_string, scan_pos, terminator_pos - scan_pos);
- *start_pos = terminator_pos + 1;
- return true;
+ // Bulk insert the tuples.
+ output_destination_->bulkInsertTuples(&column_vectors);
}
-Tuple TextScanWorkOrder::parseRow(const std::string &row_string, const CatalogRelationSchema &relation) const {
+Tuple TextScanWorkOrder::parseRow(const char **row_ptr,
+ const CatalogRelationSchema &relation) const {
std::vector<TypedValue> attribute_values;
- std::size_t pos = 0;
+ bool is_null_literal;
+ bool has_reached_end_of_line = false;
std::string value_str;
- CatalogRelationSchema::const_iterator attr_it = relation.begin();
- while (pos < row_string.length()) {
- if (attr_it == relation.end()) {
- throw TextScanFormatError("Row has too many fields");
+ for (const auto &attr : relation) {
+ if (has_reached_end_of_line) {
+ throw TextScanFormatError("Row has too few fields");
}
value_str.clear();
- if (extractFieldString(row_string, &pos, &value_str)) {
- attribute_values.emplace_back();
- if (!attr_it->getType().parseValueFromString(value_str, &(attribute_values.back()))) {
- throw TextScanFormatError("Failed to parse value");
- }
- } else {
+ extractFieldString(row_ptr,
+ &is_null_literal,
+ &has_reached_end_of_line,
+ &value_str);
+
+ if (is_null_literal) {
// NULL literal.
- if (!attr_it->getType().isNullable()) {
+ if (!attr.getType().isNullable()) {
throw TextScanFormatError(
"NULL literal '\\N' was specified for a column with a "
"non-nullable Type");
}
-
- attribute_values.emplace_back(attr_it->getType().makeNullValue());
+ attribute_values.emplace_back(attr.getType().makeNullValue());
+ } else {
+ attribute_values.emplace_back();
+ if (!attr.getType().parseValueFromString(value_str, &(attribute_values.back()))) {
+ throw TextScanFormatError("Failed to parse value");
+ }
}
-
- ++attr_it;
}
- if (attr_it != relation.end()) {
- throw TextScanFormatError("Row has too few fields");
+ if (!has_reached_end_of_line) {
+ throw TextScanFormatError("Row has too many fields");
}
return Tuple(std::move(attribute_values));
}
-void TextSplitWorkOrder::execute() {
- std::FILE *file = std::fopen(filename_.c_str(), "r");
- if (!file) {
- throw TextScanReadError(filename_);
- }
-
- bool eof = false;
- do {
- // Allocate new blob, if current is empty.
- if (0 == remainingBlobBytes()) {
- allocateBlob();
- }
-
- // Read the into the unwritten part of blob.
- std::size_t bytes =
- std::fread(writeableBlobAddress(), 1, remainingBlobBytes(), file);
- eof = bytes < remainingBlobBytes();
- written_ += bytes;
-
- // Write the current blob to queue for processing.
- sendBlobInfoToOperator(!eof /* write_row_aligned */);
- } while (!eof);
-
- std::fclose(file);
+void TextScanWorkOrder::extractFieldString(const char **field_ptr,
+ bool *is_null_literal,
+ bool *has_reached_end_of_line,
+ std::string *field_string) const {
+ const char *cur_ptr = *field_ptr;
+ *is_null_literal = false;
- // Notify the operator about the completion of this Work Order.
- FeedbackMessage msg(TextScanOperator::kSplitWorkOrderCompletionMessage,
- operator_index_,
- nullptr /* payload */,
- 0 /* payload_size */,
- false /* ownership */);
- SendFeedbackMessage(bus_, ClientIDMap::Instance()->getValue(), scheduler_client_id_, msg);
-}
+ // Check for NULL literal string.
+ if (process_escape_sequences_ && cur_ptr[0] == '\\' && cur_ptr[1] == 'N') {
+ cur_ptr += 2;
-// Allocate new blob.
-void TextSplitWorkOrder::allocateBlob() {
- text_blob_id_ = storage_manager_->createBlob(FLAGS_textscan_split_blob_size);
- text_blob_ = storage_manager_->getBlobMutable(text_blob_id_);
- blob_size_ = text_blob_->size();
- written_ = 0;
-}
+ // Skip '\r'
+ if (*cur_ptr == '\r') {
+ ++cur_ptr;
+ }
-// Find the last row terminator in the blob.
-std::size_t TextSplitWorkOrder::findLastRowTerminator() {
- std::size_t found = 0;
- const char *blob = static_cast<const char *>(text_blob_->getMemory());
-
- for (std::size_t index = written_;
- index != 0;
- --index) {
- if (DetectRowTerminator(blob, index, process_escape_sequences_)) {
- found = index;
- break;
+ const char c = *cur_ptr;
+ if (c == field_terminator_ || c == '\n') {
+ *is_null_literal = true;
+ *has_reached_end_of_line = (c == '\n');
+ *field_ptr = cur_ptr + 1;
+ return;
}
}
- // TODO(quickstep-team): Design a way to handle long rows that are larger than
- // the configured blob size.
- CHECK_NE(0u, found) << "No row terminator found in " << FLAGS_textscan_split_blob_size
- << "-slot chunk of " << filename_;
- return found;
-}
+ // Not a NULL literal string, rewind cur_ptr to the start position for parsing.
+ cur_ptr = *field_ptr;
-void TextSplitWorkOrder::sendBlobInfoToOperator(const bool write_row_aligned) {
- std::size_t text_len = written_;
- std::string residue;
- if (write_row_aligned) {
- // Find last row terminator in current blob.
- text_len = findLastRowTerminator();
-
- // Copy the residual bytes after the last row terminator.
- residue = std::string(
- static_cast<char *>(text_blob_->getMemoryMutable()) + text_len,
- written_ - text_len);
- }
+ if (!process_escape_sequences_) {
+ // Simply copy until field_terminator or '\n'.
+ for (;; ++cur_ptr) {
+ const char c = *cur_ptr;
+ if (c == field_terminator_) {
+ *has_reached_end_of_line = false;
+ break;
+ } else if (c == '\n') {
+ *has_reached_end_of_line = true;
+ break;
+ }
- // Notify the operator for the split-up blob.
- serialization::TextBlob proto;
- proto.set_blob_id(text_blob_id_);
- proto.set_size(text_len);
-
- const std::size_t payload_size = proto.ByteSize();
- // NOTE(zuyu): 'payload' gets released by FeedbackMessage's destructor.
- char *payload = static_cast<char *>(std::malloc(payload_size));
- CHECK(proto.SerializeToArray(payload, payload_size));
-
- const tmb::client_id worker_thread_client_id = ClientIDMap::Instance()->getValue();
- FeedbackMessage feedback_msg(TextScanOperator::kNewTextBlobMessage,
- operator_index_,
- payload,
- payload_size);
- SendFeedbackMessage(bus_, worker_thread_client_id, scheduler_client_id_, feedback_msg);
-
- // Notify Foreman for the avaiable work order on the blob.
- serialization::WorkOrdersAvailableMessage message_proto;
- message_proto.set_operator_index(operator_index_);
-
- // NOTE(zuyu): Using the heap memory to serialize proto as a c-like string.
- const size_t message_proto_length = message_proto.ByteSize();
- char *message_proto_bytes = static_cast<char*>(std::malloc(message_proto_length));
- CHECK(message_proto.SerializeToArray(message_proto_bytes, message_proto_length));
-
- tmb::TaggedMessage tagged_message(static_cast<const void *>(message_proto_bytes),
- message_proto_length,
- kWorkOrdersAvailableMessage);
- std::free(message_proto_bytes);
-
- // Send new work order available message to Foreman.
- const tmb::MessageBus::SendStatus send_status =
- QueryExecutionUtil::SendTMBMessage(
- bus_,
- worker_thread_client_id,
- scheduler_client_id_,
- std::move(tagged_message));
- CHECK(send_status == tmb::MessageBus::SendStatus::kOK) << "Message could not "
- "be sent from thread with TMB client ID "
- << worker_thread_client_id << " to Foreman with TMB client "
- "ID " << scheduler_client_id_;
-
- if (residue.size()) {
- // Allocate new blob, and copy residual bytes from last blob.
- allocateBlob();
- std::memcpy(writeableBlobAddress(), residue.data(), residue.size());
- written_ += residue.size();
+ // Ignore '\r'
+ if (c != '\r') {
+ field_string->push_back(c);
+ }
+ }
+ } else {
+ for (;; ++cur_ptr) {
+ const char c = *cur_ptr;
+ if (c == '\\') {
+ ++cur_ptr;
+ const char first_escaped_character = *cur_ptr;
+ switch (first_escaped_character) {
+ case '0': // Fallthrough for octal digits.
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ field_string->push_back(ParseOctalLiteral(&cur_ptr));
+ break;
+ case 'N': {
+ // Null literal after some other column data.
+ throw TextScanFormatError(
+ "Null indicator '\\N' encountered in text scan mixed in with "
+ "other column data.");
+ }
+ case '\\':
+ // Backslash.
+ field_string->push_back('\\');
+ break;
+ case 'b':
+ // Backspace.
+ field_string->push_back('\b');
+ break;
+ case 'f':
+ // Form-feed.
+ field_string->push_back('\f');
+ break;
+ case 'n':
+ // Newline.
+ field_string->push_back('\n');
+ break;
+ case 'r':
+ // Carriage return.
+ field_string->push_back('\r');
+ break;
+ case 't':
+ // Tab.
+ field_string->push_back('\t');
+ break;
+ case 'v':
+ // Vertical tab.
+ field_string->push_back('\v');
+ break;
+ case 'x':
+ if (std::isxdigit(cur_ptr[1])) {
+ // Hexidecimal char literal.
+ ++cur_ptr;
+ field_string->push_back(ParseHexLiteral(&cur_ptr));
+ } else {
+ // Just an escaped 'x' with no hex digits.
+ field_string->push_back('x');
+ }
+ break;
+ case '\n':
+ throw TextScanFormatError(
+ "Backslash line splicing is not supported.");
+ default:
+ // Append escaped character as-is.
+ field_string->push_back(first_escaped_character);
+ break;
+ }
+ } else if (c == field_terminator_) {
+ *has_reached_end_of_line = false;
+ break;
+ } else if (c == '\n') {
+ *has_reached_end_of_line = true;
+ break;
+ } else {
+ if (c != '\r') {
+ // Ignore '\r'
+ field_string->push_back(c);
+ }
+ }
+ }
}
+ *field_ptr = cur_ptr + 1;
}
} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5346e7f7/relational_operators/TextScanOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/TextScanOperator.hpp b/relational_operators/TextScanOperator.hpp
index 3cda65b..d73e7dd 100644
--- a/relational_operators/TextScanOperator.hpp
+++ b/relational_operators/TextScanOperator.hpp
@@ -1,6 +1,8 @@
/**
* Copyright 2011-2015 Quickstep Technologies LLC.
* Copyright 2015-2016 Pivotal Software, Inc.
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin\u2014Madison.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -18,26 +20,18 @@
#ifndef QUICKSTEP_RELATIONAL_OPERATORS_TEXT_SCAN_OPERATOR_HPP_
#define QUICKSTEP_RELATIONAL_OPERATORS_TEXT_SCAN_OPERATOR_HPP_
-#include <atomic>
+#include <cctype>
#include <cstddef>
-#include <cstdint>
-#include <cstdio>
#include <exception>
#include <string>
#include "catalog/CatalogRelation.hpp"
#include "catalog/CatalogTypedefs.hpp"
#include "query_execution/QueryContext.hpp"
-#include "query_execution/QueryExecutionTypedefs.hpp"
#include "relational_operators/RelationalOperator.hpp"
#include "relational_operators/WorkOrder.hpp"
-#include "storage/StorageBlob.hpp"
-#include "storage/StorageBlockInfo.hpp"
#include "types/containers/Tuple.hpp"
#include "utility/Macros.hpp"
-#include "utility/ThreadSafeQueue.hpp"
-
-#include "glog/logging.h"
#include "tmb/id_typedefs.h"
@@ -98,26 +92,11 @@ class TextScanFormatError : public std::exception {
};
/**
- * @brief A structure for text data blobs.
- */
-struct TextBlob {
- TextBlob(const block_id text_blob_id, const std::size_t text_size)
- : blob_id(text_blob_id), size(text_size) {}
- block_id blob_id;
- std::size_t size;
-};
-
-/**
* @brief An operator which reads tuples from a text file and inserts them into
* a relation.
**/
class TextScanOperator : public RelationalOperator {
public:
- enum FeedbackMessageType : WorkOrder::FeedbackMessageType {
- kNewTextBlobMessage,
- kSplitWorkOrderCompletionMessage,
- };
-
/**
* @brief Constructor
*
@@ -130,29 +109,22 @@ class TextScanOperator : public RelationalOperator {
* the text file.
* @param process_escape_sequences Whether to decode escape sequences in the
* text file.
- * @param parallelize_load Parallelize the load process by th spliting file
- * into blobs, and generating separate work-orders for each of them.
* @param output_relation The output relation.
* @param output_destination_index The index of the InsertDestination in the
* QueryContext to insert tuples.
**/
- TextScanOperator(
- const std::size_t query_id,
- const std::string &file_pattern,
- const char field_terminator,
- const bool process_escape_sequences,
- const bool parallelize_load,
- const CatalogRelation &output_relation,
- const QueryContext::insert_destination_id output_destination_index)
+ TextScanOperator(const std::size_t query_id,
+ const std::string &file_pattern,
+ const char field_terminator,
+ const bool process_escape_sequences,
+ const CatalogRelation &output_relation,
+ const QueryContext::insert_destination_id output_destination_index)
: RelationalOperator(query_id),
file_pattern_(file_pattern),
field_terminator_(field_terminator),
process_escape_sequences_(process_escape_sequences),
- parallelize_load_(parallelize_load),
output_relation_(output_relation),
output_destination_index_(output_destination_index),
- num_done_split_work_orders_(0),
- num_split_work_orders_(0),
work_generated_(false) {}
~TextScanOperator() override {}
@@ -171,23 +143,14 @@ class TextScanOperator : public RelationalOperator {
return output_relation_.getID();
}
- void receiveFeedbackMessage(const WorkOrder::FeedbackMessage &msg) override;
-
private:
const std::string file_pattern_;
const char field_terminator_;
const bool process_escape_sequences_;
- const bool parallelize_load_;
const CatalogRelation &output_relation_;
const QueryContext::insert_destination_id output_destination_index_;
- ThreadSafeQueue<TextBlob> text_blob_queue_;
- std::atomic<std::uint32_t> num_done_split_work_orders_;
- std::uint32_t num_split_work_orders_;
-
- // Indicates if work order to load file is generated for non-parallel load, and
- // if work order to split file to blobs is generated for parallel load.
bool work_generated_;
DISALLOW_COPY_AND_ASSIGN(TextScanOperator);
@@ -203,7 +166,9 @@ class TextScanWorkOrder : public WorkOrder {
*
* @param query_id The ID of the query to which this WorkOrder belongs.
* @param filename The name of the text file to bulk insert.
- * @param field_terminator The string which separates attribute values in
+ * @param text_offset The start position in the text file to start text scan.
+ * @param text_segment_size The size of text segment to be scanned.
+ * @param field_terminator The character which separates attribute values in
* the text file.
* @param process_escape_sequences Whether to decode escape sequences in the
* text file.
@@ -213,28 +178,8 @@ class TextScanWorkOrder : public WorkOrder {
TextScanWorkOrder(
const std::size_t query_id,
const std::string &filename,
- const char field_terminator,
- const bool process_escape_sequences,
- InsertDestination *output_destination,
- StorageManager *storage_manager);
-
- /**
- * @brief Constructor.
- *
- * @param query_id The ID of the query to which this WorkOrder belongs.
- * @param text_blob Blob ID containing the data to be scanned.
- * @param text_size Size of the data in the blob.
- * @param field_terminator The character which separates attribute values in
- * the text file.
- * @param process_escape_sequences Whether to decode escape sequences in the
- * text file.
- * @param output_destination The InsertDestination to write the read tuples.
- * @param storage_manager The StorageManager to use.
- */
- TextScanWorkOrder(
- const std::size_t query_id,
- const block_id text_blob,
- const std::size_t text_size,
+ const std::size_t text_offset,
+ const std::size_t text_segment_size,
const char field_terminator,
const bool process_escape_sequences,
InsertDestination *output_destination,
@@ -255,141 +200,106 @@ class TextScanWorkOrder : public WorkOrder {
void execute() override;
private:
- // Parse up to three octal digits (0-7) starting at '*start_pos' in
- // 'row_string' as a char literal. '*start_pos' will be modified to
- // the first position AFTER the parsed octal digits.
- static char ParseOctalLiteral(const std::string &row_string,
- std::size_t *start_pos);
-
- // Parse up to two hexadecimal digits (0-F, case insensitive) starting at
- // '*start_pos' in 'row_string' as a char literal. '*start_pos' will be
- // modified to the first position AFTER the parsed hexadecimal digits.
- static char ParseHexLiteral(const std::string &row_string,
- std::size_t *start_pos);
-
- // Read the next text row from the open FILE stream '*file' into
- // '*row_string'. Returns false if end-of-file is reached and there are no
- // more rows, true if a row string was successfully read. For ease of
- // parsing, '*row_string' has the trailing row-terminator removed and
- // replaced with a field-terminator.
- bool readRowFromFile(FILE *file, std::string *row_string) const;
-
- // Read the next text from blob memory starting at '**start_pos' and ending
- // at '*end_pos' into '*row_string'. Returns false if the end of the blob is
- // reached and there are no more rows, true if a row was successfully read.
- // For ease of parsing, '*row_string' has the trailing row-terminator removed
- // and replaced with a field-terminator. After call '*start_pos' points to
- // first character AFTER the read row in the blob.
- bool readRowFromBlob(const char **start_pos,
- const char *end_pos,
- std::string *row_string) const;
-
- // Trim a row-terminator (newline or carriage-return + newline) off the end
- // of '*row_string'. Returns true if the row-terminator was successfully
- // removed, false if '*row_string' did not end in a row-terminator.
- bool removeRowTerminator(std::string *row_string) const;
-
- // Extract a field string starting at '*start_pos' in 'row_string' into
- // '*field_string'. This method also expands escape sequences if
- // 'process_escape_sequences_' is true. Returns true if a field string was
- // successfully extracted, false in the special case where the NULL-literal
- // string "\N" was found. Throws TextScanFormatError if text was malformed.
- bool extractFieldString(const std::string &row_string,
- std::size_t *start_pos,
- std::string *field_string) const;
-
- // Make a tuple by parsing all of the individual fields specified in
- // 'row_string'.
- Tuple parseRow(const std::string &row_string, const CatalogRelationSchema &relation) const;
-
- const bool is_file_;
- const std::string filename_;
- const char field_terminator_;
- const block_id text_blob_;
- const std::size_t text_size_;
- const bool process_escape_sequences_;
-
- InsertDestination *output_destination_;
- StorageManager *storage_manager_;
-
- DISALLOW_COPY_AND_ASSIGN(TextScanWorkOrder);
-};
-
-/**
- * @brief A WorkOrder to split the file into blobs of text that can be processed
- * separately.
- **/
-class TextSplitWorkOrder : public WorkOrder {
- public:
/**
- * @brief Constructor.
+ * @brief Extract a field string starting at \p *field_ptr. This method also
+ * expands escape sequences if \p process_escape_sequences_ is true.
+ * Throws TextScanFormatError if text was malformed.
*
- * @param query_id The ID of the query to which this WorkOrder belongs.
- * @param filename File to split into row-aligned blobs.
- * @param process_escape_sequences Whether to decode escape sequences in the
- * text file.
- * @param storage_manager The StorageManager to use.
- * @param operator_index Operator index of the current operator. This is used
- * to send new-work available message to Foreman.
- * @param scheduler_client_id The TMB client ID of the scheduler thread.
- * @param bus A pointer to the TMB.
+ * @param field_ptr \p *field_ptr points to the current position of the input
+ * char stream for parsing. The overall char stream must end with a
+ * newline character. After the call, \p *field_ptr will be modified to
+ * the start position of the NEXT field string.
+ * @param is_null_literal OUTPUT parameter. Set to true if the NULL-literal
+ * string "\N" was found.
+ * @param has_reached_end_of_line OUTPUT parameter. Set to true if the newline
+ * character was encountered.
+ * @param field_string OUTPUT parameter. Set to the extracted field string.
*/
- TextSplitWorkOrder(const std::size_t query_id,
- const std::string &filename,
- const bool process_escape_sequences,
- StorageManager *storage_manager,
- const std::size_t operator_index,
- const tmb::client_id scheduler_client_id,
- MessageBus *bus)
- : WorkOrder(query_id),
- filename_(filename),
- process_escape_sequences_(process_escape_sequences),
- storage_manager_(DCHECK_NOTNULL(storage_manager)),
- operator_index_(operator_index),
- scheduler_client_id_(scheduler_client_id),
- bus_(DCHECK_NOTNULL(bus)) {}
+ void extractFieldString(const char **field_ptr,
+ bool *is_null_literal,
+ bool *has_reached_end_of_line,
+ std::string *field_string) const;
/**
- * @exception TextScanReadError The text file could not be opened for
- * reading.
+ * @brief Make a tuple by parsing all of the individual fields from a char stream.
+ *
+ * @param \p *row_ptr points to the current position of the input char stream
+ * for parsing. The overall char stream must end with a newline character.
+ * After the call, \p *row_ptr will be modified to the start position of
+ * the NEXT text row.
+ * @param relation The relation schema for the tuple.
+ * @return The tuple parsed from the char stream.
*/
- void execute() override;
-
- private:
- // Allocate a new blob.
- void allocateBlob();
-
- // Find the last row terminator in current blob.
- std::size_t findLastRowTerminator();
+ Tuple parseRow(const char **row_ptr,
+ const CatalogRelationSchema &relation) const;
- // Send the blob info to its operator via TMB.
- void sendBlobInfoToOperator(const bool write_row_aligned);
- // Get the writeable address (unwritten chunk) in current blob.
- inline char* writeableBlobAddress() {
- return static_cast<char*>(text_blob_->getMemoryMutable()) + written_;
+ /**
+ * @brief Parse up to three octal digits (0-7) starting at \p *literal_ptr as
+ * a char literal. \p *literal_ptr will be modified to the last position
+ * of the parsed octal digits.
+ *
+ * @param literal_ptr \p *literal_ptr points to the current position of the
+ * input char stream for parsing. The overall char stream must end with
+ * a newline character.
+ * @return The char literal from the parsed octal digits.
+ */
+ inline static char ParseOctalLiteral(const char **literal_ptr) {
+ int value = 0;
+ const char *ptr = *literal_ptr;
+ for (int i = 0; i < 3; ++i, ++ptr) {
+ const int char_value = *ptr - '0';
+ if ((char_value >= 0) && (char_value < 8)) {
+ value = value * 8 + char_value;
+ } else {
+ break;
+ }
+ }
+ *literal_ptr = ptr - 1;
+ return value;
}
- // Number of bytes remaining to be written.
- inline std::size_t remainingBlobBytes() const {
- return blob_size_ - written_;
+ /**
+ * @brief Parse up to two hexadecimal digits (0-F, case insensitive) starting
+ * at \p *literal_ptr as a char literal. \p *literal_ptr will be modified
+ * to the last position of the parsed octal digits.
+ *
+ * @param literal_ptr \p *literal_ptr points to the current position of the
+ * input char stream for parsing. The overall char stream must end with
+ * a newline character.
+ * @return The char literal from the parsed hexadecimal digits.
+ */
+ inline static char ParseHexLiteral(const char **literal_ptr) {
+ int value = 0;
+ const char *ptr = *literal_ptr;
+ for (int i = 0; i < 2; ++i, ++ptr) {
+ const char c = *ptr;
+ int char_value;
+ if (std::isdigit(c)) {
+ char_value = c - '0';
+ } else if (c >= 'a' && c <= 'f') {
+ char_value = c - 'a' + 10;
+ } else if (c >= 'A' && c <= 'F') {
+ char_value = c - 'A' + 10;
+ } else {
+ break;
+ }
+ value = value * 16 + char_value;
+ }
+ *literal_ptr = ptr - 1;
+ return value;
}
- const std::string filename_; // File to split.
+ const std::string filename_;
+ const std::size_t text_offset_;
+ const std::size_t text_segment_size_;
+ const char field_terminator_;
const bool process_escape_sequences_;
+ InsertDestination *output_destination_;
StorageManager *storage_manager_;
- const std::size_t operator_index_; // Opeartor index.
- const tmb::client_id scheduler_client_id_; // The scheduler's TMB client ID.
- MessageBus *bus_;
-
- MutableBlobReference text_blob_; // Mutable reference to current blob.
- block_id text_blob_id_; // Current blob ID.
- std::size_t written_ = 0; // Bytes written in current blob.
- std::size_t blob_size_ = 0; // Size of the current blob.
-
- DISALLOW_COPY_AND_ASSIGN(TextSplitWorkOrder);
+ DISALLOW_COPY_AND_ASSIGN(TextScanWorkOrder);
};
/** @} */
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5346e7f7/relational_operators/WorkOrder.proto
----------------------------------------------------------------------
diff --git a/relational_operators/WorkOrder.proto b/relational_operators/WorkOrder.proto
index fd731f7..60d4c8f 100644
--- a/relational_operators/WorkOrder.proto
+++ b/relational_operators/WorkOrder.proto
@@ -1,5 +1,7 @@
// Copyright 2011-2015 Quickstep Technologies LLC.
// Copyright 2015-2016 Pivotal Software, Inc.
+// Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+// University of Wisconsin\u2014Madison.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -18,7 +20,6 @@ syntax = "proto2";
package quickstep.serialization;
import "relational_operators/SortMergeRunOperator.proto";
-import "relational_operators/TextScanOperator.proto";
enum WorkOrderType {
AGGREGATION = 1;
@@ -39,8 +40,7 @@ enum WorkOrderType {
SORT_RUN_GENERATION = 16;
TABLE_GENERATOR = 17;
TEXT_SCAN = 18;
- TEXT_SPLIT = 19;
- UPDATE = 20;
+ UPDATE = 19;
}
message WorkOrder {
@@ -223,15 +223,12 @@ message TableGeneratorWorkOrder {
message TextScanWorkOrder {
extend WorkOrder {
// All required.
+ optional string filename = 301;
+ optional uint64 text_offset = 302;
+ optional uint64 text_segment_size = 303;
optional uint32 field_terminator = 304; // For one-byte char.
optional bool process_escape_sequences = 305;
optional int32 insert_destination_index = 306;
-
- // Either
- optional string filename = 307;
-
- // Or
- optional TextBlob text_blob = 308;
}
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5346e7f7/relational_operators/WorkOrderFactory.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/WorkOrderFactory.cpp b/relational_operators/WorkOrderFactory.cpp
index 489b666..da42b4d 100644
--- a/relational_operators/WorkOrderFactory.cpp
+++ b/relational_operators/WorkOrderFactory.cpp
@@ -42,7 +42,6 @@
#include "relational_operators/SortRunGenerationOperator.hpp"
#include "relational_operators/TableGeneratorOperator.hpp"
#include "relational_operators/TextScanOperator.hpp"
-#include "relational_operators/TextScanOperator.pb.h"
#include "relational_operators/UpdateOperator.hpp"
#include "relational_operators/WorkOrder.pb.h"
#include "storage/StorageBlockInfo.hpp"
@@ -389,40 +388,16 @@ WorkOrder* WorkOrderFactory::ReconstructFromProto(const serialization::WorkOrder
}
case serialization::TEXT_SCAN: {
LOG(INFO) << "Creating TextScanWorkOrder";
- if (proto.HasExtension(serialization::TextScanWorkOrder::filename)) {
- return new TextScanWorkOrder(
- proto.query_id(),
- proto.GetExtension(serialization::TextScanWorkOrder::filename),
- proto.GetExtension(serialization::TextScanWorkOrder::field_terminator),
- proto.GetExtension(serialization::TextScanWorkOrder::process_escape_sequences),
- query_context->getInsertDestination(
- proto.GetExtension(serialization::TextScanWorkOrder::insert_destination_index)),
- storage_manager);
- } else {
- const serialization::TextBlob &text_blob_proto =
- proto.GetExtension(serialization::TextScanWorkOrder::text_blob);
-
- return new TextScanWorkOrder(
- proto.query_id(),
- text_blob_proto.blob_id(),
- text_blob_proto.size(),
- proto.GetExtension(serialization::TextScanWorkOrder::field_terminator),
- proto.GetExtension(serialization::TextScanWorkOrder::process_escape_sequences),
- query_context->getInsertDestination(
- proto.GetExtension(serialization::TextScanWorkOrder::insert_destination_index)),
- storage_manager);
- }
- }
- case serialization::TEXT_SPLIT: {
- LOG(INFO) << "Creating TextSplitWorkOrder";
- return new TextSplitWorkOrder(
+ return new TextScanWorkOrder(
proto.query_id(),
- proto.GetExtension(serialization::TextSplitWorkOrder::filename),
- proto.GetExtension(serialization::TextSplitWorkOrder::process_escape_sequences),
- storage_manager,
- proto.GetExtension(serialization::TextSplitWorkOrder::operator_index),
- shiftboss_client_id,
- bus);
+ proto.GetExtension(serialization::TextScanWorkOrder::filename),
+ proto.GetExtension(serialization::TextScanWorkOrder::text_offset),
+ proto.GetExtension(serialization::TextScanWorkOrder::text_segment_size),
+ proto.GetExtension(serialization::TextScanWorkOrder::field_terminator),
+ proto.GetExtension(serialization::TextScanWorkOrder::process_escape_sequences),
+ query_context->getInsertDestination(
+ proto.GetExtension(serialization::TextScanWorkOrder::insert_destination_index)),
+ storage_manager);
}
case serialization::UPDATE: {
LOG(INFO) << "Creating UpdateWorkOrder";
@@ -691,27 +666,14 @@ bool WorkOrderFactory::ProtoIsValid(const serialization::WorkOrder &proto,
proto.GetExtension(serialization::TableGeneratorWorkOrder::insert_destination_index));
}
case serialization::TEXT_SCAN: {
- if (!proto.HasExtension(serialization::TextScanWorkOrder::field_terminator) ||
- !proto.HasExtension(serialization::TextScanWorkOrder::process_escape_sequences) ||
- !proto.HasExtension(serialization::TextScanWorkOrder::insert_destination_index) ||
- !query_context.isValidInsertDestinationId(
- proto.GetExtension(serialization::TextScanWorkOrder::insert_destination_index))) {
- return false;
- }
-
- // Two fields are exclusive.
- if (proto.HasExtension(serialization::TextScanWorkOrder::filename) ==
- proto.HasExtension(serialization::TextScanWorkOrder::text_blob)) {
- return false;
- }
-
- return proto.HasExtension(serialization::TextScanWorkOrder::filename) ||
- proto.GetExtension(serialization::TextScanWorkOrder::text_blob).IsInitialized();
- }
- case serialization::TEXT_SPLIT: {
- return proto.HasExtension(serialization::TextSplitWorkOrder::filename) &&
- proto.HasExtension(serialization::TextSplitWorkOrder::process_escape_sequences) &&
- proto.HasExtension(serialization::TextSplitWorkOrder::operator_index);
+ return proto.HasExtension(serialization::TextScanWorkOrder::filename) &&
+ proto.HasExtension(serialization::TextScanWorkOrder::text_offset) &&
+ proto.HasExtension(serialization::TextScanWorkOrder::text_segment_size) &&
+ proto.HasExtension(serialization::TextScanWorkOrder::field_terminator) &&
+ proto.HasExtension(serialization::TextScanWorkOrder::process_escape_sequences) &&
+ proto.HasExtension(serialization::TextScanWorkOrder::insert_destination_index) &&
+ query_context.isValidInsertDestinationId(
+ proto.GetExtension(serialization::TextScanWorkOrder::insert_destination_index));
}
case serialization::UPDATE: {
return proto.HasExtension(serialization::UpdateWorkOrder::relation_id) &&
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5346e7f7/relational_operators/tests/TextScanOperator_unittest.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/tests/TextScanOperator_unittest.cpp b/relational_operators/tests/TextScanOperator_unittest.cpp
index ef6fc2d..5860745 100644
--- a/relational_operators/tests/TextScanOperator_unittest.cpp
+++ b/relational_operators/tests/TextScanOperator_unittest.cpp
@@ -193,7 +193,6 @@ TEST_F(TextScanOperatorTest, ScanTest) {
input_filename,
'\t',
true,
- false,
*relation_,
output_destination_index));
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5346e7f7/relational_operators/tests/text_scan_input.txt
----------------------------------------------------------------------
diff --git a/relational_operators/tests/text_scan_input.txt b/relational_operators/tests/text_scan_input.txt
index bcb76bf..51015bd 100644
--- a/relational_operators/tests/text_scan_input.txt
+++ b/relational_operators/tests/text_scan_input.txt
@@ -2,9 +2,5 @@
-1234567890 -1.2e-200 A twenty char string 1969-07-21 02:56:00 00:00:01.001 Another twenty chars
\N \N \N \N \N \N
\N \N \\N \N \N \\N
-\x34\062 \55\064\x32\56\65 \x7B\
-\t\ \\\e\s\c\a\p\e\d\x\b\n\x7d 1988-07-16\T00:00\:00\x2E0\x30\60\06001 00:00:00 'good\' \"bye"\r\n\
-\r\n\v\n\
-
-0 0.0 \\\\\
-\\\\\n 1970-01-01 0 s \\\\
+\x34\062 \55\064\x32\56\65 \x7B\n\t\ \\\e\s\c\a\p\e\d\x\b\n\x7d 1988-07-16\T00:00\:00\x2E0\x30\60\06001 00:00:00 'good\' \"bye"\r\n\n\r\n\v\n\n
+0 0.0 \\\\\n\\\\\n 1970-01-01 0 s \\\\
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/5346e7f7/storage/CompressedBlockBuilder.cpp
----------------------------------------------------------------------
diff --git a/storage/CompressedBlockBuilder.cpp b/storage/CompressedBlockBuilder.cpp
index 4a181eb..1ca0c07 100644
--- a/storage/CompressedBlockBuilder.cpp
+++ b/storage/CompressedBlockBuilder.cpp
@@ -321,6 +321,9 @@ void CompressedBlockBuilder::buildCompressedColumnStoreTupleStorageSubBlock(void
bool CompressedBlockBuilder::addTupleInternal(Tuple *candidate_tuple) {
DEBUG_ASSERT(candidate_tuple->size() == relation_.size());
+ // Ensure that the tuple is the owner of its values.
+ candidate_tuple->ensureLiteral();
+
// Modify dictionaries and maximum integers to reflect the new tuple's
// values. Keep track of what has changed in case a rollback is needed.
vector<CompressionDictionaryBuilder*> modified_dictionaries;
[06/12] incubator-quickstep git commit: Fix a potential segfault with
CompressedBlockBuilder
Posted by hb...@apache.org.
Fix a potential segfault with CompressedBlockBuilder
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/70ef4101
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/70ef4101
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/70ef4101
Branch: refs/heads/query-manager-used-in-foreman
Commit: 70ef41015585505dc015b1f4816dd48dd5ff2b80
Parents: 55b06fa
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Thu Jun 9 00:43:16 2016 -0500
Committer: Jignesh Patel <jm...@hotmail.com>
Committed: Thu Jun 9 08:06:29 2016 -0500
----------------------------------------------------------------------
storage/CompressedBlockBuilder.cpp | 3 +++
1 file changed, 3 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/70ef4101/storage/CompressedBlockBuilder.cpp
----------------------------------------------------------------------
diff --git a/storage/CompressedBlockBuilder.cpp b/storage/CompressedBlockBuilder.cpp
index 4a181eb..1ca0c07 100644
--- a/storage/CompressedBlockBuilder.cpp
+++ b/storage/CompressedBlockBuilder.cpp
@@ -321,6 +321,9 @@ void CompressedBlockBuilder::buildCompressedColumnStoreTupleStorageSubBlock(void
bool CompressedBlockBuilder::addTupleInternal(Tuple *candidate_tuple) {
DEBUG_ASSERT(candidate_tuple->size() == relation_.size());
+ // Ensure that the tuple is the owner of its values.
+ candidate_tuple->ensureLiteral();
+
// Modify dictionaries and maximum integers to reflect the new tuple's
// values. Keep track of what has changed in case a rollback is needed.
vector<CompressionDictionaryBuilder*> modified_dictionaries;
[04/12] incubator-quickstep git commit: Fix a potential segfault with
CompressedBlockBuilder
Posted by hb...@apache.org.
Fix a potential segfault with CompressedBlockBuilder
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/e95b312d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/e95b312d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/e95b312d
Branch: refs/heads/query-manager-used-in-foreman
Commit: e95b312d665ea3e6b1353b62f1efca19c303d48e
Parents: 2d39b8e
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Thu Jun 9 00:43:16 2016 -0500
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Thu Jun 9 00:43:16 2016 -0500
----------------------------------------------------------------------
storage/CompressedBlockBuilder.cpp | 3 +++
1 file changed, 3 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/e95b312d/storage/CompressedBlockBuilder.cpp
----------------------------------------------------------------------
diff --git a/storage/CompressedBlockBuilder.cpp b/storage/CompressedBlockBuilder.cpp
index 4a181eb..1ca0c07 100644
--- a/storage/CompressedBlockBuilder.cpp
+++ b/storage/CompressedBlockBuilder.cpp
@@ -321,6 +321,9 @@ void CompressedBlockBuilder::buildCompressedColumnStoreTupleStorageSubBlock(void
bool CompressedBlockBuilder::addTupleInternal(Tuple *candidate_tuple) {
DEBUG_ASSERT(candidate_tuple->size() == relation_.size());
+ // Ensure that the tuple is the owner of its values.
+ candidate_tuple->ensureLiteral();
+
// Modify dictionaries and maximum integers to reflect the new tuple's
// values. Keep track of what has changed in case a rollback is needed.
vector<CompressionDictionaryBuilder*> modified_dictionaries;
[09/12] incubator-quickstep git commit: Merge branch 'master' of
https://git-wip-us.apache.org/repos/asf/incubator-quickstep
Posted by hb...@apache.org.
Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/incubator-quickstep
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/816f6f8d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/816f6f8d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/816f6f8d
Branch: refs/heads/query-manager-used-in-foreman
Commit: 816f6f8dd299c92ce186bc0721ea17e1c5f46d69
Parents: 5346e7f 3a843db
Author: Jignesh Patel <jm...@hotmail.com>
Authored: Thu Jun 9 10:48:06 2016 -0500
Committer: Jignesh Patel <jm...@hotmail.com>
Committed: Thu Jun 9 10:48:06 2016 -0500
----------------------------------------------------------------------
----------------------------------------------------------------------
[03/12] incubator-quickstep git commit: Add options to build gflag as
a shared library.
Posted by hb...@apache.org.
Add options to build gflag as a shared library.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/096abe29
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/096abe29
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/096abe29
Branch: refs/heads/query-manager-used-in-foreman
Commit: 096abe29ab8f8510b177168f9e492f59b10e49f5
Parents: 2d39b8e
Author: Navneet Potti <na...@gmail.com>
Authored: Wed Jun 8 16:59:07 2016 -0500
Committer: Zuyu Zhang <zz...@pivotal.io>
Committed: Wed Jun 8 20:30:29 2016 -0700
----------------------------------------------------------------------
CMakeLists.txt | 8 +++++++-
cli/CMakeLists.txt | 8 +++++++-
query_execution/CMakeLists.txt | 8 +++++++-
query_optimizer/CMakeLists.txt | 8 +++++++-
query_optimizer/tests/CMakeLists.txt | 10 ++++++++--
relational_operators/CMakeLists.txt | 20 +++++++++++++-------
storage/CMakeLists.txt | 12 +++++++++---
transaction/CMakeLists.txt | 8 +++++++-
8 files changed, 65 insertions(+), 17 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/096abe29/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ef7fd50..2d10a78 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -139,6 +139,12 @@ endif()
option(ENABLE_DISTRIBUTED "Use the distributed version of Quickstep" OFF)
+if (BUILD_SHARED_LIBS)
+ set(GFLAGS_LIB_NAME gflags_nothreads-shared)
+else()
+ set(GFLAGS_LIB_NAME gflags_nothreads-static)
+endif()
+
# Turn on the QUICKSTEP_DEBUG flag in the source if this is a debug build.
if (CMAKE_MAJOR_VERSION GREATER 2)
cmake_policy(SET CMP0043 NEW)
@@ -700,7 +706,7 @@ add_subdirectory(yarn)
add_executable (quickstep_cli_shell cli/QuickstepCli.cpp)
# Link against direct deps (will transitively pull in everything needed).
target_link_libraries(quickstep_cli_shell
- gflags_nothreads-static
+ ${GFLAGS_LIB_NAME}
glog
quickstep_catalog_CatalogRelation
quickstep_cli_CommandExecutor
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/096abe29/cli/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cli/CMakeLists.txt b/cli/CMakeLists.txt
index 8fee7a4..faf5040 100644
--- a/cli/CMakeLists.txt
+++ b/cli/CMakeLists.txt
@@ -30,6 +30,12 @@ if(LIBNUMA_FOUND)
set(QUICKSTEP_HAVE_LIBNUMA TRUE)
endif()
+if (BUILD_SHARED_LIBS)
+ set(GFLAGS_LIB_NAME gflags_nothreads-shared)
+else()
+ set(GFLAGS_LIB_NAME gflags_nothreads-static)
+endif()
+
configure_file (
"${CMAKE_CURRENT_SOURCE_DIR}/CliConfig.h.in"
"${CMAKE_CURRENT_BINARY_DIR}/CliConfig.h"
@@ -110,7 +116,7 @@ target_link_libraries(quickstep_cli_InputParserUtil
${LIBNUMA_LIBRARY})
endif()
target_link_libraries(quickstep_cli_PrintToScreen
- gflags_nothreads-static
+ ${GFLAGS_LIB_NAME}
quickstep_catalog_CatalogAttribute
quickstep_catalog_CatalogRelation
quickstep_storage_StorageBlock
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/096abe29/query_execution/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_execution/CMakeLists.txt b/query_execution/CMakeLists.txt
index 7d9d601..8306f78 100644
--- a/query_execution/CMakeLists.txt
+++ b/query_execution/CMakeLists.txt
@@ -19,6 +19,12 @@ QS_PROTOBUF_GENERATE_CPP(queryexecution_QueryExecutionMessages_proto_srcs
queryexecution_QueryExecutionMessages_proto_hdrs
QueryExecutionMessages.proto)
+if (BUILD_SHARED_LIBS)
+ set(GFLAGS_LIB_NAME gflags_nothreads-shared)
+else()
+ set(GFLAGS_LIB_NAME gflags_nothreads-static)
+endif()
+
# Declare micro-libs:
if (ENABLE_DISTRIBUTED)
add_library(quickstep_queryexecution_BlockLocator BlockLocator.cpp BlockLocator.hpp)
@@ -202,7 +208,7 @@ if (ENABLE_DISTRIBUTED)
add_executable(BlockLocator_unittest
"${CMAKE_CURRENT_SOURCE_DIR}/tests/BlockLocator_unittest.cpp")
target_link_libraries(BlockLocator_unittest
- gflags_nothreads-static
+ ${GFLAGS_LIB_NAME}
glog
gtest
quickstep_catalog_CatalogAttribute
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/096abe29/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index 5c9438d..8f08130 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -24,6 +24,12 @@ configure_file (
"${CMAKE_CURRENT_BINARY_DIR}/QueryOptimizerConfig.h"
)
+if (BUILD_SHARED_LIBS)
+ set(GFLAGS_LIB_NAME gflags_nothreads-shared)
+else()
+ set(GFLAGS_LIB_NAME gflags_nothreads-static)
+endif()
+
add_subdirectory(cost_model)
add_subdirectory(expressions)
add_subdirectory(logical)
@@ -182,7 +188,7 @@ target_link_libraries(quickstep_queryoptimizer_OptimizerTree
quickstep_utility_Macros
quickstep_utility_TreeStringSerializable)
target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator
- gflags_nothreads-static
+ ${GFLAGS_LIB_NAME}
quickstep_queryoptimizer_LogicalToPhysicalMapper
quickstep_queryoptimizer_logical_Logical
quickstep_queryoptimizer_physical_Physical
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/096abe29/query_optimizer/tests/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/CMakeLists.txt b/query_optimizer/tests/CMakeLists.txt
index 07af404..6ef2a03 100644
--- a/query_optimizer/tests/CMakeLists.txt
+++ b/query_optimizer/tests/CMakeLists.txt
@@ -18,6 +18,12 @@ add_subdirectory(logical_generator)
add_subdirectory(physical_generator)
add_subdirectory(resolver)
+if (BUILD_SHARED_LIBS)
+ set(GFLAGS_LIB_NAME gflags_nothreads-shared)
+else()
+ set(GFLAGS_LIB_NAME gflags_nothreads-static)
+endif()
+
add_library(quickstep_queryoptimizer_tests_OptimizerTest OptimizerTest.cpp OptimizerTest.hpp)
add_library(quickstep_queryoptimizer_tests_TestDatabaseLoader TestDatabaseLoader.cpp TestDatabaseLoader.hpp)
@@ -102,7 +108,7 @@ add_executable(quickstep_queryoptimizer_tests_OptimizerTextTest
"${PROJECT_SOURCE_DIR}/utility/textbased_test/TextBasedTest.hpp")
target_link_libraries(quickstep_queryoptimizer_tests_ExecutionGeneratorTest
- gflags_nothreads-static
+ ${GFLAGS_LIB_NAME}
glog
gtest
quickstep_catalog_CatalogDatabase
@@ -132,7 +138,7 @@ target_link_libraries(quickstep_queryoptimizer_tests_ExecutionGeneratorTest
tmb
${LIBS})
target_link_libraries(quickstep_queryoptimizer_tests_OptimizerTextTest
- gflags_nothreads-static
+ ${GFLAGS_LIB_NAME}
glog
gtest
gtest_main
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/096abe29/relational_operators/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/relational_operators/CMakeLists.txt b/relational_operators/CMakeLists.txt
index eec5300..d2693eb 100644
--- a/relational_operators/CMakeLists.txt
+++ b/relational_operators/CMakeLists.txt
@@ -23,6 +23,12 @@ QS_PROTOBUF_GENERATE_CPP(relationaloperators_WorkOrder_proto_srcs
relationaloperators_WorkOrder_proto_hdrs
WorkOrder.proto)
+if (BUILD_SHARED_LIBS)
+ set(GFLAGS_LIB_NAME gflags_nothreads-shared)
+else()
+ set(GFLAGS_LIB_NAME gflags_nothreads-static)
+endif()
+
# Declare micro-libs:
add_library(quickstep_relationaloperators_AggregationOperator AggregationOperator.cpp AggregationOperator.hpp)
add_library(quickstep_relationaloperators_BuildHashOperator BuildHashOperator.cpp BuildHashOperator.hpp)
@@ -160,7 +166,7 @@ target_link_libraries(quickstep_relationaloperators_FinalizeAggregationOperator
quickstep_utility_Macros
tmb)
target_link_libraries(quickstep_relationaloperators_HashJoinOperator
- gflags_nothreads-static
+ ${GFLAGS_LIB_NAME}
glog
quickstep_catalog_CatalogRelation
quickstep_catalog_CatalogRelationSchema
@@ -350,7 +356,7 @@ target_link_libraries(quickstep_relationaloperators_TableGeneratorOperator
quickstep_utility_Macros
tmb)
target_link_libraries(quickstep_relationaloperators_TextScanOperator
- gflags_nothreads-static
+ ${GFLAGS_LIB_NAME}
glog
quickstep_catalog_CatalogAttribute
quickstep_catalog_CatalogRelation
@@ -470,7 +476,7 @@ target_link_libraries(quickstep_relationaloperators
add_executable(AggregationOperator_unittest
"${CMAKE_CURRENT_SOURCE_DIR}/tests/AggregationOperator_unittest.cpp")
target_link_libraries(AggregationOperator_unittest
- gflags_nothreads-static
+ ${GFLAGS_LIB_NAME}
glog
gtest
quickstep_catalog_CatalogAttribute
@@ -523,7 +529,7 @@ add_test(AggregationOperator_unittest AggregationOperator_unittest)
add_executable(HashJoinOperator_unittest
"${CMAKE_CURRENT_SOURCE_DIR}/tests/HashJoinOperator_unittest.cpp")
target_link_libraries(HashJoinOperator_unittest
- gflags_nothreads-static
+ ${GFLAGS_LIB_NAME}
glog
gtest
quickstep_catalog_CatalogAttribute
@@ -573,7 +579,7 @@ add_test(HashJoinOperator_unittest HashJoinOperator_unittest)
add_executable(SortMergeRunOperator_unittest
"${CMAKE_CURRENT_SOURCE_DIR}/tests/SortMergeRunOperator_unittest.cpp")
target_link_libraries(SortMergeRunOperator_unittest
- gflags_nothreads-static
+ ${GFLAGS_LIB_NAME}
glog
gtest
quickstep_catalog_CatalogAttribute
@@ -624,7 +630,7 @@ add_test(SortMergeRunOperator_unittest SortMergeRunOperator_unittest)
add_executable(SortRunGenerationOperator_unittest
"${CMAKE_CURRENT_SOURCE_DIR}/tests/SortRunGenerationOperator_unittest.cpp")
target_link_libraries(SortRunGenerationOperator_unittest
- gflags_nothreads-static
+ ${GFLAGS_LIB_NAME}
glog
gtest
quickstep_catalog_CatalogAttribute
@@ -672,7 +678,7 @@ add_test(SortRunGenerationOperator_unittest SortRunGenerationOperator_unittest)
add_executable(TextScanOperator_unittest
"${CMAKE_CURRENT_SOURCE_DIR}/tests/TextScanOperator_unittest.cpp")
target_link_libraries(TextScanOperator_unittest
- gflags_nothreads-static
+ ${GFLAGS_LIB_NAME}
glog
gtest
quickstep_catalog_CatalogAttribute
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/096abe29/storage/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/storage/CMakeLists.txt b/storage/CMakeLists.txt
index a77976a..b536411 100644
--- a/storage/CMakeLists.txt
+++ b/storage/CMakeLists.txt
@@ -21,6 +21,12 @@ if (REBUILD_INDEX_ON_UPDATE_OVERFLOW)
set(QUICKSTEP_REBUILD_INDEX_ON_UPDATE_OVERFLOW TRUE)
endif()
+if (BUILD_SHARED_LIBS)
+ set(GFLAGS_LIB_NAME gflags_nothreads-shared)
+else()
+ set(GFLAGS_LIB_NAME gflags_nothreads-static)
+endif()
+
include(CheckIncludeFileCXX)
check_include_files("fcntl.h;glob.h;unistd.h;sys/stat.h;sys/types.h" QUICKSTEP_HAVE_FILE_MANAGER_POSIX)
if (NOT QUICKSTEP_HAVE_FILE_MANAGER_POSIX)
@@ -617,7 +623,7 @@ target_link_libraries(quickstep_storage_FileManager
if (QUICKSTEP_HAVE_FILE_MANAGER_HDFS)
target_link_libraries(quickstep_storage_FileManagerHdfs
glog
- gflags_nothreads-static
+ ${GFLAGS_LIB_NAME}
quickstep_storage_FileManager
quickstep_storage_StorageBlockInfo
quickstep_storage_StorageConstants
@@ -950,7 +956,7 @@ target_link_libraries(quickstep_storage_StorageBlockLayout
target_link_libraries(quickstep_storage_StorageBlockLayout_proto
${PROTOBUF_LIBRARY})
target_link_libraries(quickstep_storage_StorageManager
- gflags_nothreads-static
+ ${GFLAGS_LIB_NAME}
glog
gtest
quickstep_catalog_CatalogTypedefs
@@ -1380,7 +1386,7 @@ if (ENABLE_DISTRIBUTED)
add_executable(DataExchange_unittest
"${CMAKE_CURRENT_SOURCE_DIR}/tests/DataExchange_unittest.cpp")
target_link_libraries(DataExchange_unittest
- gflags_nothreads-static
+ ${GFLAGS_LIB_NAME}
glog
gtest
quickstep_catalog_CatalogAttribute
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/096abe29/transaction/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/transaction/CMakeLists.txt b/transaction/CMakeLists.txt
index c6c87b6..430b4bc 100644
--- a/transaction/CMakeLists.txt
+++ b/transaction/CMakeLists.txt
@@ -13,6 +13,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+if (BUILD_SHARED_LIBS)
+ set(GFLAGS_LIB_NAME gflags_nothreads-shared)
+else()
+ set(GFLAGS_LIB_NAME gflags_nothreads-static)
+endif()
+
add_library(quickstep_transaction_AccessMode
AccessMode.cpp
AccessMode.hpp)
@@ -68,7 +74,7 @@ target_link_libraries(quickstep_transaction_Lock
quickstep_transaction_AccessMode
quickstep_transaction_ResourceId)
target_link_libraries(quickstep_transaction_LockManager
- gflags_nothreads-static
+ ${GFLAGS_LIB_NAME}
glog
quickstep_utility_ThreadSafeQueue
quickstep_threading_Thread
[07/12] incubator-quickstep git commit: Merge branch 'master' of
https://git-wip-us.apache.org/repos/asf/incubator-quickstep
Posted by hb...@apache.org.
Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/incubator-quickstep
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/3a843dbc
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/3a843dbc
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/3a843dbc
Branch: refs/heads/query-manager-used-in-foreman
Commit: 3a843dbc2157646b90c63b3784be5208e4906d72
Parents: 70ef410 e95b312
Author: Jignesh Patel <jm...@hotmail.com>
Authored: Thu Jun 9 08:07:08 2016 -0500
Committer: Jignesh Patel <jm...@hotmail.com>
Committed: Thu Jun 9 08:07:08 2016 -0500
----------------------------------------------------------------------
----------------------------------------------------------------------
[10/12] incubator-quickstep git commit: Long lived Foreman thread
Posted by hb...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/relational_operators/tests/TextScanOperator_unittest.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/tests/TextScanOperator_unittest.cpp b/relational_operators/tests/TextScanOperator_unittest.cpp
index 5860745..8e30286 100644
--- a/relational_operators/tests/TextScanOperator_unittest.cpp
+++ b/relational_operators/tests/TextScanOperator_unittest.cpp
@@ -180,9 +180,11 @@ TEST_F(TextScanOperatorTest, ScanTest) {
// Setup the InsertDestination proto in the query context proto.
serialization::QueryContext query_context_proto;
+ query_context_proto.set_query_id(0); // dummy query ID.
QueryContext::insert_destination_id output_destination_index = query_context_proto.insert_destinations_size();
serialization::InsertDestination *output_destination_proto = query_context_proto.add_insert_destinations();
+ output_destination_proto->set_query_id(query_context_proto.query_id());
output_destination_proto->set_insert_destination_type(serialization::InsertDestinationType::BLOCK_POOL);
output_destination_proto->set_relation_id(relation_->getID());
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/storage/InsertDestination.cpp
----------------------------------------------------------------------
diff --git a/storage/InsertDestination.cpp b/storage/InsertDestination.cpp
index 354bed4..5e4dd28 100644
--- a/storage/InsertDestination.cpp
+++ b/storage/InsertDestination.cpp
@@ -60,6 +60,7 @@ InsertDestination::InsertDestination(const CatalogRelationSchema &relation,
const StorageBlockLayout *layout,
StorageManager *storage_manager,
const std::size_t relational_op_index,
+ const std::size_t query_id,
const tmb::client_id scheduler_client_id,
tmb::MessageBus *bus)
: thread_id_map_(*ClientIDMap::Instance()),
@@ -67,6 +68,7 @@ InsertDestination::InsertDestination(const CatalogRelationSchema &relation,
relation_(relation),
layout_(layout),
relational_op_index_(relational_op_index),
+ query_id_(query_id),
scheduler_client_id_(scheduler_client_id),
bus_(DCHECK_NOTNULL(bus)) {
if (layout_ == nullptr) {
@@ -74,11 +76,12 @@ InsertDestination::InsertDestination(const CatalogRelationSchema &relation,
}
}
-InsertDestination* InsertDestination::ReconstructFromProto(const serialization::InsertDestination &proto,
- const CatalogRelationSchema &relation,
- StorageManager *storage_manager,
- const tmb::client_id scheduler_client_id,
- tmb::MessageBus *bus) {
+InsertDestination* InsertDestination::ReconstructFromProto(
+ const serialization::InsertDestination &proto,
+ const CatalogRelationSchema &relation,
+ StorageManager *storage_manager,
+ const tmb::client_id scheduler_client_id,
+ tmb::MessageBus *bus) {
DCHECK(ProtoIsValid(proto, relation));
StorageBlockLayout *layout = nullptr;
@@ -93,6 +96,7 @@ InsertDestination* InsertDestination::ReconstructFromProto(const serialization::
layout,
storage_manager,
proto.relational_op_index(),
+ proto.query_id(),
scheduler_client_id,
bus);
}
@@ -107,6 +111,7 @@ InsertDestination* InsertDestination::ReconstructFromProto(const serialization::
storage_manager,
move(blocks),
proto.relational_op_index(),
+ proto.query_id(),
scheduler_client_id,
bus);
}
@@ -134,6 +139,7 @@ InsertDestination* InsertDestination::ReconstructFromProto(const serialization::
storage_manager,
move(partitions),
proto.relational_op_index(),
+ proto.query_id(),
scheduler_client_id,
bus);
}
@@ -262,6 +268,7 @@ MutableBlockReference AlwaysCreateBlockInsertDestination::createNewBlock() {
serialization::CatalogRelationNewBlockMessage proto;
proto.set_relation_id(relation_.getID());
proto.set_block_id(new_id);
+ proto.set_query_id(getQueryID());
const size_t proto_length = proto.ByteSize();
char *proto_bytes = static_cast<char*>(malloc(proto_length));
@@ -309,6 +316,7 @@ MutableBlockReference BlockPoolInsertDestination::createNewBlock() {
serialization::CatalogRelationNewBlockMessage proto;
proto.set_relation_id(relation_.getID());
proto.set_block_id(new_id);
+ proto.set_query_id(getQueryID());
const size_t proto_length = proto.ByteSize();
char *proto_bytes = static_cast<char*>(malloc(proto_length));
@@ -385,21 +393,29 @@ const std::vector<block_id>& BlockPoolInsertDestination::getTouchedBlocksInterna
return done_block_ids_;
}
-PartitionAwareInsertDestination::PartitionAwareInsertDestination(PartitionSchemeHeader *partition_scheme_header,
- const CatalogRelationSchema &relation,
- const StorageBlockLayout *layout,
- StorageManager *storage_manager,
- vector<vector<block_id>> &&partitions,
- const std::size_t relational_op_index,
- const tmb::client_id scheduler_client_id,
- tmb::MessageBus *bus)
- : InsertDestination(relation, layout, storage_manager, relational_op_index, scheduler_client_id, bus),
+PartitionAwareInsertDestination::PartitionAwareInsertDestination(
+ PartitionSchemeHeader *partition_scheme_header,
+ const CatalogRelationSchema &relation,
+ const StorageBlockLayout *layout,
+ StorageManager *storage_manager,
+ vector<vector<block_id>> &&partitions,
+ const std::size_t relational_op_index,
+ const std::size_t query_id,
+ const tmb::client_id scheduler_client_id,
+ tmb::MessageBus *bus)
+ : InsertDestination(relation,
+ layout,
+ storage_manager,
+ relational_op_index,
+ query_id,
+ scheduler_client_id,
+ bus),
partition_scheme_header_(DCHECK_NOTNULL(partition_scheme_header)),
available_block_refs_(partition_scheme_header_->getNumPartitions()),
available_block_ids_(move(partitions)),
done_block_ids_(partition_scheme_header_->getNumPartitions()),
- mutexes_for_partition_(new SpinMutex[partition_scheme_header_->getNumPartitions()]) {
-}
+ mutexes_for_partition_(
+ new SpinMutex[partition_scheme_header_->getNumPartitions()]) {}
MutableBlockReference PartitionAwareInsertDestination::createNewBlock() {
FATAL_ERROR("PartitionAwareInsertDestination::createNewBlock needs a partition id as an argument.");
@@ -415,6 +431,7 @@ MutableBlockReference PartitionAwareInsertDestination::createNewBlockInPartition
proto.set_relation_id(relation_.getID());
proto.set_block_id(new_id);
proto.set_partition_id(part_id);
+ proto.set_query_id(getQueryID());
const size_t proto_length = proto.ByteSize();
char *proto_bytes = static_cast<char*>(malloc(proto_length));
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/storage/InsertDestination.hpp
----------------------------------------------------------------------
diff --git a/storage/InsertDestination.hpp b/storage/InsertDestination.hpp
index 670cd6c..6968149 100644
--- a/storage/InsertDestination.hpp
+++ b/storage/InsertDestination.hpp
@@ -78,6 +78,7 @@ class InsertDestination : public InsertDestinationInterface {
* @param storage_manager The StorageManager to use.
* @param relational_op_index The index of the relational operator in the
* QueryPlan DAG that has outputs.
+ * @param query_id The ID of this query.
* @param scheduler_client_id The TMB client ID of the scheduler thread.
* @param bus A pointer to the TMB.
**/
@@ -85,6 +86,7 @@ class InsertDestination : public InsertDestinationInterface {
const StorageBlockLayout *layout,
StorageManager *storage_manager,
const std::size_t relational_op_index,
+ const std::size_t query_id,
const tmb::client_id scheduler_client_id,
tmb::MessageBus *bus);
@@ -211,6 +213,7 @@ class InsertDestination : public InsertDestinationInterface {
proto.set_operator_index(relational_op_index_);
proto.set_block_id(id);
proto.set_relation_id(relation_.getID());
+ proto.set_query_id(query_id_);
// NOTE(zuyu): Using the heap memory to serialize proto as a c-like string.
const std::size_t proto_length = proto.ByteSize();
@@ -253,6 +256,10 @@ class InsertDestination : public InsertDestinationInterface {
" ID " << scheduler_client_id_;
}
+ inline const std::size_t getQueryID() const {
+ return query_id_;
+ }
+
const ClientIDMap &thread_id_map_;
StorageManager *storage_manager_;
@@ -260,6 +267,7 @@ class InsertDestination : public InsertDestinationInterface {
std::unique_ptr<const StorageBlockLayout> layout_;
const std::size_t relational_op_index_;
+ const std::size_t query_id_;
tmb::client_id scheduler_client_id_;
tmb::MessageBus *bus_;
@@ -288,10 +296,16 @@ class AlwaysCreateBlockInsertDestination : public InsertDestination {
const StorageBlockLayout *layout,
StorageManager *storage_manager,
const std::size_t relational_op_index,
+ const std::size_t query_id,
const tmb::client_id scheduler_client_id,
tmb::MessageBus *bus)
- : InsertDestination(relation, layout, storage_manager, relational_op_index, scheduler_client_id, bus) {
- }
+ : InsertDestination(relation,
+ layout,
+ storage_manager,
+ relational_op_index,
+ query_id,
+ scheduler_client_id,
+ bus) {}
~AlwaysCreateBlockInsertDestination() override {
}
@@ -334,16 +348,23 @@ class BlockPoolInsertDestination : public InsertDestination {
* @param relational_op_index The index of the relational operator in the
* QueryPlan DAG that has outputs.
* @param scheduler_client_id The TMB client ID of the scheduler thread.
+ * @param query_id The ID of the query.
* @param bus A pointer to the TMB.
**/
BlockPoolInsertDestination(const CatalogRelationSchema &relation,
const StorageBlockLayout *layout,
StorageManager *storage_manager,
const std::size_t relational_op_index,
+ const std::size_t query_id,
const tmb::client_id scheduler_client_id,
tmb::MessageBus *bus)
- : InsertDestination(relation, layout, storage_manager, relational_op_index, scheduler_client_id, bus) {
- }
+ : InsertDestination(relation,
+ layout,
+ storage_manager,
+ relational_op_index,
+ query_id,
+ scheduler_client_id,
+ bus) {}
/**
* @brief Constructor.
@@ -363,9 +384,16 @@ class BlockPoolInsertDestination : public InsertDestination {
StorageManager *storage_manager,
std::vector<block_id> &&blocks,
const std::size_t relational_op_index,
+ const std::size_t query_id,
const tmb::client_id scheduler_client_id,
tmb::MessageBus *bus)
- : InsertDestination(relation, layout, storage_manager, relational_op_index, scheduler_client_id, bus),
+ : InsertDestination(relation,
+ layout,
+ storage_manager,
+ relational_op_index,
+ query_id,
+ scheduler_client_id,
+ bus),
available_block_ids_(std::move(blocks)) {
// TODO(chasseur): Once block fill statistics are available, replace this
// with something smarter.
@@ -386,7 +414,6 @@ class BlockPoolInsertDestination : public InsertDestination {
MutableBlockReference createNewBlock() override;
private:
- FRIEND_TEST(ForemanTest, TwoNodesDAGPartiallyFilledBlocksTest);
FRIEND_TEST(QueryManagerTest, TwoNodesDAGPartiallyFilledBlocksTest);
// A vector of references to blocks which are loaded in memory.
@@ -416,17 +443,20 @@ class PartitionAwareInsertDestination : public InsertDestination {
* @param partitions The blocks in partitions.
* @param relational_op_index The index of the relational operator in the
* QueryPlan DAG that has outputs.
+ * @param query_id The ID of the query.
* @param scheduler_client_id The TMB client ID of the scheduler thread.
* @param bus A pointer to the TMB.
**/
- PartitionAwareInsertDestination(PartitionSchemeHeader *partition_scheme_header,
- const CatalogRelationSchema &relation,
- const StorageBlockLayout *layout,
- StorageManager *storage_manager,
- std::vector<std::vector<block_id>> &&partitions,
- const std::size_t relational_op_index,
- const tmb::client_id scheduler_client_id,
- tmb::MessageBus *bus);
+ PartitionAwareInsertDestination(
+ PartitionSchemeHeader *partition_scheme_header,
+ const CatalogRelationSchema &relation,
+ const StorageBlockLayout *layout,
+ StorageManager *storage_manager,
+ std::vector<std::vector<block_id>> &&partitions,
+ const std::size_t relational_op_index,
+ const std::size_t query_id,
+ const tmb::client_id scheduler_client_id,
+ tmb::MessageBus *bus);
~PartitionAwareInsertDestination() override {
delete[] mutexes_for_partition_;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/storage/InsertDestination.proto
----------------------------------------------------------------------
diff --git a/storage/InsertDestination.proto b/storage/InsertDestination.proto
index 6083539..a3d8acf 100644
--- a/storage/InsertDestination.proto
+++ b/storage/InsertDestination.proto
@@ -34,6 +34,7 @@ message InsertDestination {
optional StorageBlockLayoutDescription layout = 3;
required uint64 relational_op_index = 4;
+ required uint64 query_id = 5;
// The convention for extension numbering is that extensions for a particular
// tInsertDestinationType should begin from (insert_destination_type + 1) * 16.
[12/12] incubator-quickstep git commit: Long lived Foreman thread
Posted by hb...@apache.org.
Long lived Foreman thread
- Foreman thread lives through the lifetime of the Quickstep process.
- Foreman and main thread communicate through TMB messages.
- Foreman admits queries and routes them to PolicyEnforcer.
- Foreman relays messages to policy enforcer which in turn processes it,
based on the query ID of the message.
- All the tests modified accordingly.
Created PolicyEnforcer class.
- First point of entry for queries in the scheduler.
- Can perform admission control.
- Can talk to the QueryManagers of the active queries to provide them
messages to process and collect work orders for execution from them.
- Support for admitting multiple queries to the PolicyEnforcer.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/94a2e1dc
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/94a2e1dc
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/94a2e1dc
Branch: refs/heads/query-manager-used-in-foreman
Commit: 94a2e1dcba3ff5aecf5ccb4ca4c5d6012fc9a61c
Parents: 816f6f8
Author: Harshad Deshmukh <ha...@cs.wisc.edu>
Authored: Sat Apr 9 15:08:40 2016 -0500
Committer: Harshad Deshmukh <hb...@apache.org>
Committed: Thu Jun 9 12:34:28 2016 -0500
----------------------------------------------------------------------
CMakeLists.txt | 2 +
cli/CommandExecutor.cpp | 53 +-
cli/CommandExecutor.hpp | 11 +-
cli/QuickstepCli.cpp | 70 +--
cli/tests/CMakeLists.txt | 2 +
cli/tests/CommandExecutorTestRunner.cpp | 29 +-
cli/tests/CommandExecutorTestRunner.hpp | 37 +-
query_execution/AdmitRequestMessage.hpp | 73 +++
query_execution/CMakeLists.txt | 68 +--
query_execution/Foreman.cpp | 578 +++++--------------
query_execution/Foreman.hpp | 394 +------------
query_execution/PolicyEnforcer.cpp | 177 ++++++
query_execution/PolicyEnforcer.hpp | 168 ++++++
query_execution/QueryContext.cpp | 3 +-
query_execution/QueryContext.hpp | 9 +
query_execution/QueryContext.proto | 2 +
query_execution/QueryExecutionMessages.proto | 4 +
query_execution/QueryExecutionTypedefs.hpp | 4 +-
query_execution/QueryExecutionUtil.hpp | 52 ++
query_execution/QueryManager.hpp | 5 +-
query_execution/WorkOrdersContainer.hpp | 70 +--
query_execution/Worker.cpp | 11 +-
query_execution/Worker.hpp | 2 +
query_execution/WorkerMessage.hpp | 22 +-
query_execution/tests/QueryManager_unittest.cpp | 6 +
.../tests/WorkOrdersContainer_unittest.cpp | 26 +
query_optimizer/ExecutionGenerator.cpp | 32 +-
query_optimizer/ExecutionGenerator.hpp | 9 +
query_optimizer/tests/CMakeLists.txt | 2 +
.../tests/ExecutionGeneratorTestRunner.cpp | 23 +-
.../tests/ExecutionGeneratorTestRunner.hpp | 33 +-
query_optimizer/tests/TestDatabaseLoader.cpp | 1 +
relational_operators/AggregationOperator.hpp | 2 +
relational_operators/DeleteOperator.cpp | 1 +
relational_operators/DeleteOperator.hpp | 1 +
relational_operators/DestroyHashOperator.hpp | 4 +
relational_operators/DropTableOperator.hpp | 2 +
relational_operators/HashJoinOperator.hpp | 1 +
relational_operators/RebuildWorkOrder.hpp | 1 +
relational_operators/SortMergeRunOperator.cpp | 1 +
relational_operators/TableGeneratorOperator.hpp | 2 +
relational_operators/UpdateOperator.cpp | 1 +
relational_operators/UpdateOperator.hpp | 1 +
relational_operators/WorkOrder.hpp | 20 +-
.../tests/AggregationOperator_unittest.cpp | 7 +
.../tests/HashJoinOperator_unittest.cpp | 12 +
.../tests/SortMergeRunOperator_unittest.cpp | 6 +
.../SortRunGenerationOperator_unittest.cpp | 2 +
.../tests/TextScanOperator_unittest.cpp | 2 +
storage/InsertDestination.cpp | 49 +-
storage/InsertDestination.hpp | 58 +-
storage/InsertDestination.proto | 1 +
52 files changed, 1104 insertions(+), 1048 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2d10a78..acccacf 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -717,9 +717,11 @@ target_link_libraries(quickstep_cli_shell
quickstep_cli_PrintToScreen
quickstep_parser_ParseStatement
quickstep_parser_SqlParserWrapper
+ quickstep_queryexecution_AdmitRequestMessage
quickstep_queryexecution_Foreman
quickstep_queryexecution_QueryContext
quickstep_queryexecution_QueryExecutionTypedefs
+ quickstep_queryexecution_QueryExecutionUtil
quickstep_queryexecution_Worker
quickstep_queryexecution_WorkerDirectory
quickstep_queryexecution_WorkerMessage
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/cli/CommandExecutor.cpp
----------------------------------------------------------------------
diff --git a/cli/CommandExecutor.cpp b/cli/CommandExecutor.cpp
index ddcd38f..dc14741 100644
--- a/cli/CommandExecutor.cpp
+++ b/cli/CommandExecutor.cpp
@@ -51,6 +51,8 @@
#include "glog/logging.h"
+#include "tmb/id_typedefs.h"
+
using std::fprintf;
using std::fputc;
using std::fputs;
@@ -58,6 +60,8 @@ using std::size_t;
using std::string;
using std::vector;
+namespace tmb { class MessageBus; }
+
namespace quickstep {
namespace cli {
namespace {
@@ -194,11 +198,14 @@ void executeDescribeTable(
/**
* @brief A helper function that executes a SQL query to obtain a scalar result.
*/
-inline TypedValue executeQueryForSingleResult(const std::string &query_string,
- StorageManager *storage_manager,
- QueryProcessor *query_processor,
- SqlParserWrapper *parser_wrapper,
- Foreman *foreman) {
+inline TypedValue executeQueryForSingleResult(
+ const tmb::client_id main_thread_client_id,
+ const tmb::client_id foreman_client_id,
+ const std::string &query_string,
+ tmb::MessageBus *bus,
+ StorageManager *storage_manager,
+ QueryProcessor *query_processor,
+ SqlParserWrapper *parser_wrapper) {
parser_wrapper->feedNextBuffer(new std::string(query_string));
ParseResult result = parser_wrapper->getNextStatement();
@@ -210,11 +217,8 @@ inline TypedValue executeQueryForSingleResult(const std::string &query_string,
DCHECK(query_handle->getQueryPlanMutable() != nullptr);
// Use foreman to execute the query plan.
- foreman->setQueryPlan(query_handle->getQueryPlanMutable()->getQueryPlanDAGMutable());
- foreman->reconstructQueryContextFromProto(query_handle->getQueryContextProto());
-
- foreman->start();
- foreman->join();
+ QueryExecutionUtil::ConstructAndSendAdmitRequestMessage(
+ main_thread_client_id, foreman_client_id, query_handle.get(), bus);
// Retrieve the scalar result from the result relation.
const CatalogRelation *query_result_relation = query_handle->getQueryResultRelation();
@@ -246,8 +250,10 @@ inline TypedValue executeQueryForSingleResult(const std::string &query_string,
return value;
}
-void executeAnalyze(QueryProcessor *query_processor,
- Foreman *foreman,
+void executeAnalyze(const tmb::client_id main_thread_client_id,
+ const tmb::client_id foreman_client_id,
+ MessageBus *bus,
+ QueryProcessor *query_processor,
FILE *out) {
const CatalogDatabase &database = *query_processor->getDefaultDatabase();
StorageManager *storage_manager = query_processor->getStorageManager();
@@ -273,11 +279,13 @@ void executeAnalyze(QueryProcessor *query_processor,
query_string.append(";");
TypedValue num_distinct_values =
- executeQueryForSingleResult(query_string,
+ executeQueryForSingleResult(main_thread_client_id,
+ foreman_client_id,
+ query_string,
+ bus,
storage_manager,
query_processor,
- parser_wrapper.get(),
- foreman);
+ parser_wrapper.get());
DCHECK(num_distinct_values.getTypeID() == TypeID::kLong);
mutable_relation->getStatisticsMutable()->setNumDistinctValues(
@@ -291,11 +299,13 @@ void executeAnalyze(QueryProcessor *query_processor,
query_string.append(";");
TypedValue num_tuples =
- executeQueryForSingleResult(query_string,
+ executeQueryForSingleResult(main_thread_client_id,
+ foreman_client_id,
+ query_string,
+ bus,
storage_manager,
query_processor,
- parser_wrapper.get(),
- foreman);
+ parser_wrapper.get());
DCHECK(num_tuples.getTypeID() == TypeID::kLong);
mutable_relation->getStatisticsMutable()->setNumTuples(
@@ -312,9 +322,11 @@ void executeAnalyze(QueryProcessor *query_processor,
void executeCommand(const ParseStatement &statement,
const CatalogDatabase &catalog_database,
+ const tmb::client_id main_thread_client_id,
+ const tmb::client_id foreman_client_id,
+ MessageBus *bus,
StorageManager *storage_manager,
QueryProcessor *query_processor,
- Foreman *foreman,
FILE *out) {
const ParseCommand &command = static_cast<const ParseCommand &>(statement);
const PtrVector<ParseString> *arguments = command.arguments();
@@ -328,7 +340,8 @@ void executeCommand(const ParseStatement &statement,
executeDescribeTable(arguments, catalog_database, out);
}
} else if (command_str == C::kAnalyzeCommand) {
- executeAnalyze(query_processor, foreman, out);
+ executeAnalyze(
+ main_thread_client_id, foreman_client_id, bus, query_processor, out);
} else {
THROW_SQL_ERROR_AT(command.command()) << "Invalid Command";
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/cli/CommandExecutor.hpp
----------------------------------------------------------------------
diff --git a/cli/CommandExecutor.hpp b/cli/CommandExecutor.hpp
index c819981..3435aeb 100644
--- a/cli/CommandExecutor.hpp
+++ b/cli/CommandExecutor.hpp
@@ -21,10 +21,14 @@
#include <cstdio>
#include <string>
+#include "tmb/id_typedefs.h"
+
using std::fprintf;
using std::fputc;
using std::string;
+namespace tmb { class MessageBus; }
+
namespace quickstep {
class CatalogDatabase;
@@ -53,6 +57,9 @@ constexpr char kAnalyzeCommand[] = "\\analyze";
*
* @param statement The parsed statement from the cli.
* @param catalog_database The catalog information about the current database.
+ * @param main_thread_client_id The TMB client ID of the main thread.
+ * @param foreman_client_id The TMB client ID of the Foreman thread.
+ * @param bus A pointer to the TMB.
* @param storage_manager The current StorageManager.
* @param query_processor The query processor to generate plans for SQL queries.
* @param foreman The foreman to execute query plans.
@@ -60,9 +67,11 @@ constexpr char kAnalyzeCommand[] = "\\analyze";
*/
void executeCommand(const ParseStatement &statement,
const CatalogDatabase &catalog_database,
+ const tmb::client_id main_thread_client_id,
+ const tmb::client_id foreman_client_id,
+ tmb::MessageBus *bus,
StorageManager *storage_manager,
QueryProcessor *query_processor,
- Foreman *foreman,
FILE *out);
/** @} */
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/cli/QuickstepCli.cpp
----------------------------------------------------------------------
diff --git a/cli/QuickstepCli.cpp b/cli/QuickstepCli.cpp
index 558d6eb..3202c96 100644
--- a/cli/QuickstepCli.cpp
+++ b/cli/QuickstepCli.cpp
@@ -53,8 +53,10 @@ typedef quickstep::LineReaderDumb LineReaderImpl;
#include "cli/PrintToScreen.hpp"
#include "parser/ParseStatement.hpp"
#include "parser/SqlParserWrapper.hpp"
+#include "query_execution/AdmitRequestMessage.hpp"
#include "query_execution/Foreman.hpp"
#include "query_execution/QueryExecutionTypedefs.hpp"
+#include "query_execution/QueryExecutionUtil.hpp"
#include "query_execution/Worker.hpp"
#include "query_execution/WorkerDirectory.hpp"
#include "query_execution/WorkerMessage.hpp"
@@ -95,6 +97,7 @@ using std::string;
using std::vector;
using quickstep::Address;
+using quickstep::AdmitRequestMessage;
using quickstep::CatalogRelation;
using quickstep::DefaultsConfigurator;
using quickstep::DropRelation;
@@ -107,6 +110,7 @@ using quickstep::ParseResult;
using quickstep::ParseStatement;
using quickstep::PrintToScreen;
using quickstep::PtrVector;
+using quickstep::QueryExecutionUtil;
using quickstep::QueryHandle;
using quickstep::QueryPlan;
using quickstep::QueryProcessor;
@@ -115,9 +119,12 @@ using quickstep::TaggedMessage;
using quickstep::Worker;
using quickstep::WorkerDirectory;
using quickstep::WorkerMessage;
+using quickstep::kAdmitRequestMessage;
using quickstep::kPoisonMessage;
+using quickstep::kWorkloadCompletionMessage;
using tmb::client_id;
+using tmb::AnnotatedMessage;
namespace quickstep {
@@ -197,7 +204,9 @@ int main(int argc, char* argv[]) {
// The TMB client id for the main thread, used to kill workers at the end.
const client_id main_thread_client_id = bus.Connect();
+ bus.RegisterClientAsSender(main_thread_client_id, kAdmitRequestMessage);
bus.RegisterClientAsSender(main_thread_client_id, kPoisonMessage);
+ bus.RegisterClientAsReceiver(main_thread_client_id, kWorkloadCompletionMessage);
// Setup the paths used by StorageManager.
string fixed_storage_path(quickstep::FLAGS_storage_path);
@@ -283,12 +292,6 @@ int main(int argc, char* argv[]) {
std::chrono::duration<double>(preload_end - preload_start).count());
}
- Foreman foreman(&bus,
- query_processor->getDefaultDatabase(),
- query_processor->getStorageManager(),
- -1, /* CPU id to bind foreman. -1 is unbound. */
- num_numa_nodes_system);
-
// Get the NUMA affinities for workers.
vector<int> cpu_numa_nodes = InputParserUtil::GetNUMANodesForCPUs();
if (cpu_numa_nodes.empty()) {
@@ -323,13 +326,20 @@ int main(int argc, char* argv[]) {
worker_client_ids,
worker_numa_nodes);
- foreman.setWorkerDirectory(&worker_directory);
+ Foreman foreman(main_thread_client_id,
+ &worker_directory,
+ &bus,
+ query_processor->getDefaultDatabase(),
+ query_processor->getStorageManager(),
+ num_numa_nodes_system);
// Start the worker threads.
for (Worker &worker : workers) {
worker.start();
}
+ foreman.start();
+
LineReaderImpl line_reader("quickstep> ",
" ...> ");
std::unique_ptr<SqlParserWrapper> parser_wrapper(new SqlParserWrapper());
@@ -366,9 +376,11 @@ int main(int argc, char* argv[]) {
quickstep::cli::executeCommand(
*result.parsed_statement,
*(query_processor->getDefaultDatabase()),
+ main_thread_client_id,
+ foreman.getBusClientID(),
+ &bus,
query_processor->getStorageManager(),
query_processor.get(),
- &foreman,
stdout);
} catch (const quickstep::SqlError &sql_error) {
fprintf(stderr, "%s",
@@ -389,14 +401,20 @@ int main(int argc, char* argv[]) {
}
DCHECK(query_handle->getQueryPlanMutable() != nullptr);
- foreman.setQueryPlan(query_handle->getQueryPlanMutable()->getQueryPlanDAGMutable());
-
- foreman.reconstructQueryContextFromProto(query_handle->getQueryContextProto());
-
+ AdmitRequestMessage request_message(query_handle.get());
+ TaggedMessage admit_tagged_message(
+ &request_message, sizeof(request_message), kAdmitRequestMessage);
+
+ start = std::chrono::steady_clock::now();
+ QueryExecutionUtil::SendTMBMessage(&bus,
+ main_thread_client_id,
+ foreman.getBusClientID(),
+ std::move(admit_tagged_message));
try {
- start = std::chrono::steady_clock::now();
- foreman.start();
- foreman.join();
+ const AnnotatedMessage annotated_msg =
+ bus.Receive(main_thread_client_id, 0, true);
+ const TaggedMessage &tagged_message = annotated_msg.tagged_message;
+ DCHECK_EQ(kWorkloadCompletionMessage, tagged_message.message_type());
end = std::chrono::steady_clock::now();
const CatalogRelation *query_result_relation = query_handle->getQueryResultRelation();
@@ -440,29 +458,13 @@ int main(int argc, char* argv[]) {
}
}
- // Terminate all workers before exiting.
- // The main thread broadcasts poison message to the workers. Each worker dies
- // after receiving poison message. The order of workers' death is irrelavant.
- MessageStyle style;
- style.Broadcast(true);
- Address address;
- address.All(true);
- std::unique_ptr<WorkerMessage> poison_message(WorkerMessage::PoisonMessage());
- TaggedMessage poison_tagged_message(poison_message.get(),
- sizeof(*poison_message),
- kPoisonMessage);
-
- const tmb::MessageBus::SendStatus send_status =
- bus.Send(main_thread_client_id,
- address,
- style,
- std::move(poison_tagged_message));
- CHECK(send_status == tmb::MessageBus::SendStatus::kOK) <<
- "Broadcast message from Foreman to workers failed";
+ // Kill the foreman and workers.
+ QueryExecutionUtil::BroadcastPoisonMessage(main_thread_client_id, &bus);
for (Worker &worker : workers) {
worker.join();
}
+ foreman.join();
return 0;
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/cli/tests/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cli/tests/CMakeLists.txt b/cli/tests/CMakeLists.txt
index ca37e4a..d177d6c 100644
--- a/cli/tests/CMakeLists.txt
+++ b/cli/tests/CMakeLists.txt
@@ -32,9 +32,11 @@ target_link_libraries(quickstep_cli_tests_CommandExecutorTest
quickstep_cli_PrintToScreen
quickstep_parser_ParseStatement
quickstep_parser_SqlParserWrapper
+ quickstep_queryexecution_AdmitRequestMessage
quickstep_queryexecution_Foreman
quickstep_queryexecution_QueryContext
quickstep_queryexecution_QueryExecutionTypedefs
+ quickstep_queryexecution_QueryExecutionUtil
quickstep_queryexecution_Worker
quickstep_queryexecution_WorkerDirectory
quickstep_queryexecution_WorkerMessage
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/cli/tests/CommandExecutorTestRunner.cpp
----------------------------------------------------------------------
diff --git a/cli/tests/CommandExecutorTestRunner.cpp b/cli/tests/CommandExecutorTestRunner.cpp
index 9cd493e..794f7e1 100644
--- a/cli/tests/CommandExecutorTestRunner.cpp
+++ b/cli/tests/CommandExecutorTestRunner.cpp
@@ -20,12 +20,15 @@
#include <cstdio>
#include <set>
#include <string>
+#include <utility>
#include "cli/CommandExecutor.hpp"
#include "cli/DropRelation.hpp"
#include "cli/PrintToScreen.hpp"
#include "parser/ParseStatement.hpp"
+#include "query_execution/AdmitRequestMessage.hpp"
#include "query_execution/Foreman.hpp"
+#include "query_execution/QueryExecutionTypedefs.hpp"
#include "query_execution/Worker.hpp"
#include "query_optimizer/ExecutionGenerator.hpp"
#include "query_optimizer/LogicalGenerator.hpp"
@@ -41,6 +44,8 @@
#include "glog/logging.h"
+#include "tmb/tagged_message.h"
+
namespace quickstep {
class CatalogRelation;
@@ -87,9 +92,11 @@ void CommandExecutorTestRunner::runTestCase(
quickstep::cli::executeCommand(
*result.parsed_statement,
*(test_database_loader_.catalog_database()),
+ main_thread_client_id_,
+ foreman_->getBusClientID(),
+ &bus_,
test_database_loader_.storage_manager(),
nullptr,
- nullptr,
output_stream.file());
} else {
QueryHandle query_handle(optimizer_context.query_id());
@@ -100,14 +107,20 @@ void CommandExecutorTestRunner::runTestCase(
physical_generator.generatePlan(
logical_generator.generatePlan(*result.parsed_statement));
execution_generator.generatePlan(physical_plan);
- foreman_->setQueryPlan(
- query_handle.getQueryPlanMutable()->getQueryPlanDAGMutable());
-
- foreman_->reconstructQueryContextFromProto(query_handle.getQueryContextProto());
-
- foreman_->start();
- foreman_->join();
+ AdmitRequestMessage request_message(&query_handle);
+ TaggedMessage admit_tagged_message(
+ &request_message, sizeof(request_message), kAdmitRequestMessage);
+ QueryExecutionUtil::SendTMBMessage(&bus_,
+ main_thread_client_id_,
+ foreman_->getBusClientID(),
+ std::move(admit_tagged_message));
+
+ // Receive workload completion message from Foreman.
+ const AnnotatedMessage annotated_msg =
+ bus_.Receive(main_thread_client_id_, 0, true);
+ const TaggedMessage &tagged_message = annotated_msg.tagged_message;
+ DCHECK_EQ(kWorkloadCompletionMessage, tagged_message.message_type());
const CatalogRelation *query_result_relation = query_handle.getQueryResultRelation();
if (query_result_relation) {
PrintToScreen::PrintRelation(*query_result_relation,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/cli/tests/CommandExecutorTestRunner.hpp
----------------------------------------------------------------------
diff --git a/cli/tests/CommandExecutorTestRunner.hpp b/cli/tests/CommandExecutorTestRunner.hpp
index 94b1d6a..8fb5b65 100644
--- a/cli/tests/CommandExecutorTestRunner.hpp
+++ b/cli/tests/CommandExecutorTestRunner.hpp
@@ -27,6 +27,7 @@
#include "parser/SqlParserWrapper.hpp"
#include "query_execution/Foreman.hpp"
#include "query_execution/QueryExecutionTypedefs.hpp"
+#include "query_execution/QueryExecutionUtil.hpp"
#include "query_execution/Worker.hpp"
#include "query_execution/WorkerDirectory.hpp"
#include "query_execution/WorkerMessage.hpp"
@@ -34,6 +35,9 @@
#include "utility/Macros.hpp"
#include "utility/textbased_test/TextBasedTestDriver.hpp"
+#include "tmb/id_typedefs.h"
+#include "tmb/message_bus.h"
+
namespace quickstep {
/**
@@ -57,9 +61,11 @@ class CommandExecutorTestRunner : public TextBasedTestRunner {
bus_.Initialize();
- foreman_.reset(new Foreman(&bus_,
- test_database_loader_.catalog_database(),
- test_database_loader_.storage_manager()));
+ main_thread_client_id_ = bus_.Connect();
+ bus_.RegisterClientAsSender(main_thread_client_id_, kAdmitRequestMessage);
+ bus_.RegisterClientAsSender(main_thread_client_id_, kPoisonMessage);
+ bus_.RegisterClientAsReceiver(main_thread_client_id_, kWorkloadCompletionMessage);
+
worker_.reset(new Worker(0, &bus_));
std::vector<client_id> worker_client_ids;
@@ -71,27 +77,20 @@ class CommandExecutorTestRunner : public TextBasedTestRunner {
workers_.reset(new WorkerDirectory(1 /* number of workers */,
worker_client_ids, numa_nodes));
- foreman_->setWorkerDirectory(workers_.get());
+ foreman_.reset(new Foreman(main_thread_client_id_,
+ workers_.get(),
+ &bus_,
+ test_database_loader_.catalog_database(),
+ test_database_loader_.storage_manager()));
+ foreman_->start();
worker_->start();
}
~CommandExecutorTestRunner() {
- std::unique_ptr<WorkerMessage> poison_message(WorkerMessage::PoisonMessage());
- TaggedMessage poison_tagged_message(poison_message.get(),
- sizeof(*poison_message),
- quickstep::kPoisonMessage);
-
- Address worker_address;
- MessageStyle single_receiver_style;
-
- worker_address.AddRecipient(worker_->getBusClientID());
- bus_.Send(foreman_->getBusClientID(),
- worker_address,
- single_receiver_style,
- std::move(poison_tagged_message));
-
+ QueryExecutionUtil::BroadcastPoisonMessage(main_thread_client_id_, &bus_);
worker_->join();
+ foreman_->join();
}
void runTestCase(const std::string &input,
@@ -102,6 +101,8 @@ class CommandExecutorTestRunner : public TextBasedTestRunner {
SqlParserWrapper sql_parser_;
optimizer::TestDatabaseLoader test_database_loader_;
+ tmb::client_id main_thread_client_id_;
+
MessageBusImpl bus_;
std::unique_ptr<Foreman> foreman_;
std::unique_ptr<Worker> worker_;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/query_execution/AdmitRequestMessage.hpp
----------------------------------------------------------------------
diff --git a/query_execution/AdmitRequestMessage.hpp b/query_execution/AdmitRequestMessage.hpp
new file mode 100644
index 0000000..e33b354
--- /dev/null
+++ b/query_execution/AdmitRequestMessage.hpp
@@ -0,0 +1,73 @@
+/**
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin\u2014Madison.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_EXECUTION_ADMIT_REQUEST_MESSAGE_HPP_
+#define QUICKSTEP_QUERY_EXECUTION_ADMIT_REQUEST_MESSAGE_HPP_
+
+#include <vector>
+
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+
+class QueryHandle;
+
+/** \addtogroup QueryExecution
+ * @{
+ */
+
+/**
+ * @brief A message requesting a query or queries to be admitted to the system.
+ **/
+class AdmitRequestMessage {
+ public:
+ /**
+ * @brief Constructor.
+ *
+ * @param query_handles The handles of the queries requesting to be admitted
+ * to the system.
+ **/
+ explicit AdmitRequestMessage(const std::vector<QueryHandle*> &query_handles)
+ : query_handles_(query_handles) {}
+
+ /**
+ * @brief Constructor for requesting single query admission.
+ *
+ * @param query_handle The handle of the query requesting to be admitted.
+ **/
+ explicit AdmitRequestMessage(QueryHandle *query_handle) {
+ query_handles_.push_back(query_handle);
+ }
+
+ /**
+ * @brief Get the query handles from this message.
+ **/
+ const std::vector<QueryHandle*>& getQueryHandles() const {
+ return query_handles_;
+ }
+
+ private:
+ std::vector<QueryHandle*> query_handles_;
+
+ DISALLOW_COPY_AND_ASSIGN(AdmitRequestMessage);
+};
+
+/** @} */
+
+} // namespace quickstep
+
+#endif // QUICKSTEP_QUERY_EXECUTION_ADMIT_REQUEST_MESSAGE_HPP_
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/query_execution/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_execution/CMakeLists.txt b/query_execution/CMakeLists.txt
index 8306f78..e75296e 100644
--- a/query_execution/CMakeLists.txt
+++ b/query_execution/CMakeLists.txt
@@ -29,8 +29,10 @@ endif()
if (ENABLE_DISTRIBUTED)
add_library(quickstep_queryexecution_BlockLocator BlockLocator.cpp BlockLocator.hpp)
endif()
+add_library(quickstep_queryexecution_AdmitRequestMessage ../empty_src.cpp AdmitRequestMessage.hpp)
add_library(quickstep_queryexecution_Foreman Foreman.cpp Foreman.hpp)
add_library(quickstep_queryexecution_ForemanLite ../empty_src.cpp ForemanLite.hpp)
+add_library(quickstep_queryexecution_PolicyEnforcer PolicyEnforcer.cpp PolicyEnforcer.hpp)
add_library(quickstep_queryexecution_QueryContext QueryContext.cpp QueryContext.hpp)
add_library(quickstep_queryexecution_QueryContext_proto
${queryexecution_QueryContext_proto_srcs}
@@ -49,6 +51,8 @@ add_library(quickstep_queryexecution_WorkerMessage ../empty_src.cpp WorkerMessag
add_library(quickstep_queryexecution_WorkerSelectionPolicy ../empty_src.cpp WorkerSelectionPolicy.hpp)
# Link dependencies:
+target_link_libraries(quickstep_queryexecution_AdmitRequestMessage
+ quickstep_utility_Macros)
if (ENABLE_DISTRIBUTED)
target_link_libraries(quickstep_queryexecution_BlockLocator
glog
@@ -63,29 +67,17 @@ if (ENABLE_DISTRIBUTED)
tmb)
endif()
target_link_libraries(quickstep_queryexecution_Foreman
+ gflags_nothreads-static
glog
- gtest
- quickstep_catalog_CatalogDatabase
- quickstep_catalog_CatalogRelation
- quickstep_catalog_CatalogTypedefs
- quickstep_catalog_PartitionScheme
+ quickstep_queryexecution_AdmitRequestMessage
quickstep_queryexecution_ForemanLite
- quickstep_queryexecution_QueryContext
- quickstep_queryexecution_QueryExecutionMessages_proto
- quickstep_queryexecution_QueryExecutionState
+ quickstep_queryexecution_PolicyEnforcer
quickstep_queryexecution_QueryExecutionTypedefs
quickstep_queryexecution_QueryExecutionUtil
- quickstep_queryexecution_WorkOrdersContainer
quickstep_queryexecution_WorkerDirectory
quickstep_queryexecution_WorkerMessage
- quickstep_relationaloperators_RebuildWorkOrder
- quickstep_relationaloperators_RelationalOperator
- quickstep_relationaloperators_WorkOrder
- quickstep_storage_InsertDestination
- quickstep_storage_StorageBlock
- quickstep_storage_StorageBlockInfo
quickstep_threading_ThreadUtil
- quickstep_utility_DAG
+ quickstep_utility_EqualsAnyConstant
quickstep_utility_Macros
tmb)
target_link_libraries(quickstep_queryexecution_ForemanLite
@@ -93,6 +85,16 @@ target_link_libraries(quickstep_queryexecution_ForemanLite
quickstep_threading_Thread
quickstep_utility_Macros
tmb)
+target_link_libraries(quickstep_queryexecution_PolicyEnforcer
+ glog
+ quickstep_queryexecution_QueryExecutionMessages_proto
+ quickstep_queryexecution_QueryExecutionTypedefs
+ quickstep_queryexecution_QueryManager
+ quickstep_queryexecution_WorkerMessage
+ quickstep_queryoptimizer_QueryHandle
+ quickstep_relationaloperators_WorkOrder
+ quickstep_utility_Macros
+ tmb)
target_link_libraries(quickstep_queryexecution_QueryContext
glog
quickstep_catalog_CatalogDatabaseLite
@@ -134,7 +136,9 @@ target_link_libraries(quickstep_queryexecution_QueryExecutionTypedefs
quickstep_threading_ThreadIDBasedMap
tmb)
target_link_libraries(quickstep_queryexecution_QueryExecutionUtil
+ quickstep_queryexecution_AdmitRequestMessage
quickstep_queryexecution_QueryExecutionTypedefs
+ quickstep_queryexecution_WorkerMessage
quickstep_utility_Macros
tmb)
target_link_libraries(quickstep_queryexecution_QueryManager
@@ -184,8 +188,10 @@ target_link_libraries(quickstep_queryexecution_WorkerSelectionPolicy
# Module all-in-one library:
add_library(quickstep_queryexecution ../empty_src.cpp QueryExecutionModule.hpp)
target_link_libraries(quickstep_queryexecution
+ quickstep_queryexecution_AdmitRequestMessage
quickstep_queryexecution_Foreman
quickstep_queryexecution_ForemanLite
+ quickstep_queryexecution_PolicyEnforcer
quickstep_queryexecution_QueryContext
quickstep_queryexecution_QueryContext_proto
quickstep_queryexecution_QueryExecutionMessages_proto
@@ -229,36 +235,6 @@ if (ENABLE_DISTRIBUTED)
add_test(BlockLocator_unittest BlockLocator_unittest)
endif()
-add_executable(Foreman_unittest
- "${CMAKE_CURRENT_SOURCE_DIR}/tests/Foreman_unittest.cpp")
-target_link_libraries(Foreman_unittest
- glog
- gtest
- gtest_main
- quickstep_catalog_CatalogDatabase
- quickstep_catalog_CatalogRelation
- quickstep_catalog_CatalogTypedefs
- quickstep_queryexecution_Foreman
- quickstep_queryexecution_QueryContext
- quickstep_queryexecution_QueryContext_proto
- quickstep_queryexecution_QueryExecutionState
- quickstep_queryexecution_QueryExecutionTypedefs
- quickstep_queryexecution_WorkOrdersContainer
- quickstep_queryexecution_WorkerDirectory
- quickstep_queryexecution_WorkerMessage
- quickstep_queryoptimizer_QueryPlan
- quickstep_relationaloperators_RelationalOperator
- quickstep_relationaloperators_WorkOrder
- quickstep_storage_InsertDestination
- quickstep_storage_InsertDestination_proto
- quickstep_storage_StorageBlock
- quickstep_storage_StorageBlockInfo
- quickstep_storage_StorageManager
- quickstep_utility_DAG
- quickstep_utility_Macros
- tmb)
-add_test(Foreman_unittest Foreman_unittest)
-
add_executable(QueryManager_unittest
"${CMAKE_CURRENT_SOURCE_DIR}/tests/QueryManager_unittest.cpp")
target_link_libraries(QueryManager_unittest
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/query_execution/Foreman.cpp
----------------------------------------------------------------------
diff --git a/query_execution/Foreman.cpp b/query_execution/Foreman.cpp
index 7705819..3609120 100644
--- a/query_execution/Foreman.cpp
+++ b/query_execution/Foreman.cpp
@@ -22,355 +22,189 @@
#include <utility>
#include <vector>
-#include "catalog/CatalogDatabase.hpp"
-#include "catalog/CatalogRelation.hpp"
-#include "catalog/CatalogTypedefs.hpp"
-#include "catalog/PartitionScheme.hpp"
-#include "query_execution/QueryContext.hpp"
-#include "query_execution/QueryExecutionMessages.pb.h"
+#include "query_execution/AdmitRequestMessage.hpp"
#include "query_execution/QueryExecutionTypedefs.hpp"
#include "query_execution/QueryExecutionUtil.hpp"
#include "query_execution/WorkerDirectory.hpp"
#include "query_execution/WorkerMessage.hpp"
-#include "relational_operators/RebuildWorkOrder.hpp"
-#include "relational_operators/RelationalOperator.hpp"
-#include "relational_operators/WorkOrder.hpp"
-#include "storage/InsertDestination.hpp"
-#include "storage/StorageBlock.hpp"
-#include "storage/StorageBlockInfo.hpp"
#include "threading/ThreadUtil.hpp"
+#include "utility/EqualsAnyConstant.hpp"
#include "utility/Macros.hpp"
+#include "gflags/gflags.h"
#include "glog/logging.h"
#include "tmb/message_bus.h"
#include "tmb/tagged_message.h"
using std::move;
-using std::pair;
using std::size_t;
+using std::unique_ptr;
using std::vector;
namespace quickstep {
-void Foreman::initialize() {
+DEFINE_uint64(min_load_per_worker, 2, "The minimum load defined as the number "
+ "of pending work orders for the worker. This information is used "
+ "by the Foreman to assign work orders to worker threads");
+
+Foreman::Foreman(const tmb::client_id main_thread_client_id,
+ WorkerDirectory *worker_directory,
+ tmb::MessageBus *bus,
+ CatalogDatabaseLite *catalog_database,
+ StorageManager *storage_manager,
+ const int cpu_id,
+ const size_t num_numa_nodes)
+ : ForemanLite(bus, cpu_id),
+ main_thread_client_id_(main_thread_client_id),
+ worker_directory_(DCHECK_NOTNULL(worker_directory)),
+ catalog_database_(DCHECK_NOTNULL(catalog_database)),
+ storage_manager_(DCHECK_NOTNULL(storage_manager)) {
+ const std::vector<QueryExecutionMessageType> sender_message_types{
+ kPoisonMessage,
+ kRebuildWorkOrderMessage,
+ kWorkOrderMessage,
+ kWorkloadCompletionMessage};
+
+ for (const auto message_type : sender_message_types) {
+ bus_->RegisterClientAsSender(foreman_client_id_, message_type);
+ }
+
+ const std::vector<QueryExecutionMessageType> receiver_message_types{
+ kAdmitRequestMessage,
+ kCatalogRelationNewBlockMessage,
+ kDataPipelineMessage,
+ kPoisonMessage,
+ kRebuildWorkOrderCompleteMessage,
+ kWorkOrderFeedbackMessage,
+ kWorkOrdersAvailableMessage,
+ kWorkOrderCompleteMessage};
+
+ for (const auto message_type : receiver_message_types) {
+ bus_->RegisterClientAsReceiver(foreman_client_id_, message_type);
+ }
+
+ policy_enforcer_.reset(new PolicyEnforcer(
+ foreman_client_id_,
+ num_numa_nodes,
+ catalog_database_,
+ storage_manager_,
+ bus_));
+}
+
+void Foreman::run() {
if (cpu_id_ >= 0) {
// We can pin the foreman thread to a CPU if specified.
ThreadUtil::BindToCPU(cpu_id_);
}
- initializeState();
-
- DEBUG_ASSERT(query_dag_ != nullptr);
- const dag_node_index dag_size = query_dag_->size();
-
- // Collect all the workorders from all the relational operators in the DAG.
- for (dag_node_index index = 0; index < dag_size; ++index) {
- if (checkAllBlockingDependenciesMet(index)) {
- query_dag_->getNodePayloadMutable(index)->informAllBlockingDependenciesMet();
- processOperator(index, false);
- }
- }
-
- // Dispatch the WorkOrders generated so far.
- dispatchWorkerMessages(0, 0);
-}
-
-void Foreman::processWorkOrderCompleteMessage(const dag_node_index op_index,
- const size_t worker_thread_index) {
- query_exec_state_->decrementNumQueuedWorkOrders(op_index);
-
- // As the given worker finished executing a WorkOrder, decrement its number
- // of queued WorkOrders.
- workers_->decrementNumQueuedWorkOrders(worker_thread_index);
-
- // Check if new work orders are available and fetch them if so.
- fetchNormalWorkOrders(op_index);
-
- if (checkRebuildRequired(op_index)) {
- if (checkNormalExecutionOver(op_index)) {
- if (!checkRebuildInitiated(op_index)) {
- if (initiateRebuild(op_index)) {
- // Rebuild initiated and completed right away.
- markOperatorFinished(op_index);
- } else {
- // Rebuild under progress.
- }
- } else if (checkRebuildOver(op_index)) {
- // Rebuild was under progress and now it is over.
- markOperatorFinished(op_index);
- }
- } else {
- // Normal execution under progress for this operator.
- }
- } else if (checkOperatorExecutionOver(op_index)) {
- // Rebuild not required for this operator and its normal execution is
- // complete.
- markOperatorFinished(op_index);
- }
-
- for (const pair<dag_node_index, bool> &dependent_link :
- query_dag_->getDependents(op_index)) {
- const dag_node_index dependent_op_index = dependent_link.first;
- if (checkAllBlockingDependenciesMet(dependent_op_index)) {
- // Process the dependent operator (of the operator whose WorkOrder
- // was just executed) for which all the dependencies have been met.
- processOperator(dependent_op_index, true);
- }
- }
-
- // Dispatch the WorkerMessages to the workers. We prefer to start the search
- // for the schedulable WorkOrders beginning from 'op_index'. The first
- // candidate worker to receive the next WorkOrder is the one that sent the
- // response message to Foreman.
- dispatchWorkerMessages(worker_thread_index, op_index);
-}
-
-void Foreman::processRebuildWorkOrderCompleteMessage(const dag_node_index op_index,
- const size_t worker_thread_index) {
- query_exec_state_->decrementNumRebuildWorkOrders(op_index);
- workers_->decrementNumQueuedWorkOrders(worker_thread_index);
-
- if (checkRebuildOver(op_index)) {
- markOperatorFinished(op_index);
-
- for (const pair<dag_node_index, bool> &dependent_link :
- query_dag_->getDependents(op_index)) {
- const dag_node_index dependent_op_index = dependent_link.first;
- if (checkAllBlockingDependenciesMet(dependent_op_index)) {
- processOperator(dependent_op_index, true);
- }
- }
- }
-
- // Dispatch the WorkerMessages to the workers. We prefer to start the search
- // for the schedulable WorkOrders beginning from 'op_index'. The first
- // candidate worker to receive the next WorkOrder is the one that sent the
- // response message to Foreman.
- dispatchWorkerMessages(worker_thread_index, op_index);
-}
-
-void Foreman::processDataPipelineMessage(const dag_node_index op_index,
- const block_id block,
- const relation_id rel_id) {
- for (const dag_node_index consumer_index :
- output_consumers_[op_index]) {
- // Feed the streamed block to the consumer. Note that 'output_consumers_'
- // only contain those dependents of operator with index = op_index which are
- // eligible to receive streamed input.
- query_dag_->getNodePayloadMutable(consumer_index)->feedInputBlock(block, rel_id);
- // Because of the streamed input just fed, check if there are any new
- // WorkOrders available and if so, fetch them.
- fetchNormalWorkOrders(consumer_index);
- }
-
- // Dispatch the WorkerMessages to the workers. We prefer to start the search
- // for the schedulable WorkOrders beginning from 'op_index'. The first
- // candidate worker to receive the next WorkOrder is the one that sent the
- // response message to Foreman.
- // TODO(zuyu): Improve the data locality for the next WorkOrder.
- dispatchWorkerMessages(0, op_index);
-}
-
-void Foreman::processFeedbackMessage(const WorkOrder::FeedbackMessage &msg) {
- RelationalOperator *op =
- query_dag_->getNodePayloadMutable(msg.header().rel_op_index);
- op->receiveFeedbackMessage(msg);
-}
-
-void Foreman::run() {
- // Initialize before for Foreman eventloop.
- initialize();
// Event loop
- while (!query_exec_state_->hasQueryExecutionFinished()) {
+ for (;;) {
// Receive() causes this thread to sleep until next message is received.
- AnnotatedMessage annotated_msg = bus_->Receive(foreman_client_id_, 0, true);
+ const AnnotatedMessage annotated_msg =
+ bus_->Receive(foreman_client_id_, 0, true);
const TaggedMessage &tagged_message = annotated_msg.tagged_message;
- switch (tagged_message.message_type()) {
- case kWorkOrderCompleteMessage: {
- serialization::WorkOrderCompletionMessage proto;
- CHECK(proto.ParseFromArray(tagged_message.message(), tagged_message.message_bytes()));
-
- processWorkOrderCompleteMessage(proto.operator_index(), proto.worker_thread_index());
- break;
- }
- case kRebuildWorkOrderCompleteMessage: {
- serialization::WorkOrderCompletionMessage proto;
- CHECK(proto.ParseFromArray(tagged_message.message(), tagged_message.message_bytes()));
-
- processRebuildWorkOrderCompleteMessage(proto.operator_index(), proto.worker_thread_index());
+ const tmb::message_type_id message_type = tagged_message.message_type();
+ switch (message_type) {
+ case kCatalogRelationNewBlockMessage: // Fall through
+ case kDataPipelineMessage:
+ case kRebuildWorkOrderCompleteMessage:
+ case kWorkOrderCompleteMessage:
+ case kWorkOrderFeedbackMessage:
+ case kWorkOrdersAvailableMessage: {
+ policy_enforcer_->processMessage(tagged_message);
break;
}
- case kCatalogRelationNewBlockMessage: {
- serialization::CatalogRelationNewBlockMessage proto;
- CHECK(proto.ParseFromArray(tagged_message.message(), tagged_message.message_bytes()));
-
- const block_id block = proto.block_id();
-
- CatalogRelation *relation =
- static_cast<CatalogDatabase*>(catalog_database_)->getRelationByIdMutable(proto.relation_id());
- relation->addBlock(block);
-
- if (proto.has_partition_id()) {
- relation->getPartitionSchemeMutable()->addBlockToPartition(proto.partition_id(), block);
+ case kAdmitRequestMessage: {
+ const AdmitRequestMessage *msg =
+ static_cast<const AdmitRequestMessage *>(tagged_message.message());
+ const vector<QueryHandle *> &query_handles = msg->getQueryHandles();
+
+ DCHECK(!query_handles.empty());
+ bool all_queries_admitted = true;
+ if (query_handles.size() == 1u) {
+ all_queries_admitted =
+ policy_enforcer_->admitQuery(query_handles.front());
+ } else {
+ all_queries_admitted = policy_enforcer_->admitQueries(query_handles);
+ }
+ if (!all_queries_admitted) {
+ LOG(WARNING) << "The scheduler could not admit all the queries";
+ // TODO(harshad) - Inform the main thread about the failure.
}
break;
}
- case kDataPipelineMessage: {
- // Possible message senders include InsertDestinations and some
- // operators which modify existing blocks.
- serialization::DataPipelineMessage proto;
- CHECK(proto.ParseFromArray(tagged_message.message(), tagged_message.message_bytes()));
-
- processDataPipelineMessage(proto.operator_index(), proto.block_id(), proto.relation_id());
- break;
- }
- case kWorkOrdersAvailableMessage: {
- serialization::WorkOrdersAvailableMessage proto;
- CHECK(proto.ParseFromArray(tagged_message.message(), tagged_message.message_bytes()));
-
- const dag_node_index op_index = proto.operator_index();
-
- // Check if new work orders are available.
- fetchNormalWorkOrders(op_index);
-
- // Dispatch the WorkerMessages to the workers. We prefer to start the search
- // for the schedulable WorkOrders beginning from 'op_index'. The first
- // candidate worker to receive the next WorkOrder is the one that sent the
- // response message to Foreman.
- // TODO(zuyu): Improve the data locality for the next WorkOrder.
- dispatchWorkerMessages(0, op_index);
- break;
- }
- case kWorkOrderFeedbackMessage: {
- WorkOrder::FeedbackMessage msg(const_cast<void *>(tagged_message.message()),
- tagged_message.message_bytes());
- processFeedbackMessage(msg);
- break;
+ case kPoisonMessage: {
+ if (policy_enforcer_->hasQueries()) {
+ LOG(WARNING) << "Foreman thread exiting while some queries are "
+ "under execution or waiting to be admitted";
+ }
+ return;
}
default:
LOG(FATAL) << "Unknown message type to Foreman";
}
- }
-
- // Clean up before exiting.
- cleanUp();
-}
-void Foreman::dispatchWorkerMessages(
- const size_t start_worker_index,
- const dag_node_index start_operator_index) {
- // Loop over all workers. Stopping criteria:
- // 1. Every worker has been assigned exactly max_msgs_per_worker_ workorders.
- // OR 2. No schedulable workorders at this time.
- size_t done_workers_count = 0;
- for (size_t curr_worker = start_worker_index;
- done_workers_count < workers_->getNumWorkers();
- curr_worker = (curr_worker + 1) % workers_->getNumWorkers()) {
- if (workers_->getNumQueuedWorkOrders(curr_worker) < max_msgs_per_worker_) {
- std::unique_ptr<WorkerMessage> msg;
- msg.reset(getNextWorkerMessage(
- start_operator_index, workers_->getNUMANode(curr_worker)));
- if (msg.get() != nullptr) {
- sendWorkerMessage(curr_worker, *msg);
- workers_->incrementNumQueuedWorkOrders(curr_worker);
- } else {
- // No schedulable workorder at this point.
- ++done_workers_count;
- }
- } else {
- // curr_worker already has been assigned max_msgs_per_worker workorders.
- ++done_workers_count;
+ if (canCollectNewMessages(message_type)) {
+ vector<unique_ptr<WorkerMessage>> new_messages;
+ policy_enforcer_->getWorkerMessages(&new_messages);
+ dispatchWorkerMessages(new_messages);
+ }
+
+ // We check again, as some queries may produce zero work orders and finish
+ // their execution.
+ if (!policy_enforcer_->hasQueries()) {
+ // Signal the main thread that there are no queries to be executed.
+ // Currently the message doesn't have any real content.
+ const int dummy_payload = 0;
+ TaggedMessage completion_tagged_message(
+ &dummy_payload, sizeof(dummy_payload), kWorkloadCompletionMessage);
+ const tmb::MessageBus::SendStatus send_status =
+ QueryExecutionUtil::SendTMBMessage(
+ bus_,
+ foreman_client_id_,
+ main_thread_client_id_,
+ move(completion_tagged_message));
+ CHECK(send_status == tmb::MessageBus::SendStatus::kOK)
+ << "Message could not be sent from Foreman with TMB client ID "
+ << foreman_client_id_ << " to main thread with TMB client ID"
+ << main_thread_client_id_;
}
}
}
-void Foreman::initializeState() {
- const dag_node_index dag_size = query_dag_->size();
-
- output_consumers_.resize(dag_size);
- blocking_dependencies_.resize(dag_size);
-
- query_exec_state_.reset(new QueryExecutionState(dag_size));
- workorders_container_.reset(new WorkOrdersContainer(dag_size, num_numa_nodes_));
-
- for (dag_node_index node_index = 0; node_index < dag_size; ++node_index) {
- const QueryContext::insert_destination_id insert_destination_index =
- query_dag_->getNodePayload(node_index).getInsertDestinationID();
- if (insert_destination_index != QueryContext::kInvalidInsertDestinationId) {
- // Rebuild is necessary whenever InsertDestination is present.
- query_exec_state_->setRebuildRequired(node_index);
- query_exec_state_->setRebuildStatus(node_index, 0, false);
- }
-
- for (const pair<dag_node_index, bool> &dependent_link :
- query_dag_->getDependents(node_index)) {
- const dag_node_index dependent_op_index = dependent_link.first;
- if (!query_dag_->getLinkMetadata(node_index, dependent_op_index)) {
- // The link is not a pipeline-breaker. Streaming of blocks is possible
- // between these two operators.
- output_consumers_[node_index].push_back(dependent_op_index);
- } else {
- // The link is a pipeline-breaker. Streaming of blocks is not possible
- // between these two operators.
- blocking_dependencies_[dependent_op_index].push_back(node_index);
- }
- }
+bool Foreman::canCollectNewMessages(const tmb::message_type_id message_type) {
+ if (QUICKSTEP_EQUALS_ANY_CONSTANT(message_type,
+ kCatalogRelationNewBlockMessage,
+ kWorkOrderFeedbackMessage)) {
+ return false;
+ } else if (worker_directory_->getLeastLoadedWorker().second <=
+ FLAGS_min_load_per_worker) {
+ // If the least loaded worker has only one pending work order, we should
+ // collect new messages and dispatch them.
+ return true;
+ } else {
+ return false;
}
}
-// TODO(harshad) : The default policy may execute remote WorkOrders for an
-// operator with a lower index even when there are local WorkOrders available for
-// an operator with higher index. We should examine if avoiding this behavior
-// has any benefits with respect to execution time and/or memory pressure.
-WorkerMessage* Foreman::getNextWorkerMessage(
- const dag_node_index start_operator_index, const int numa_node) {
- // Default policy: Operator with lowest index first.
- WorkOrder *work_order = nullptr;
- size_t num_operators_checked = 0;
- for (dag_node_index index = start_operator_index;
- num_operators_checked < query_dag_->size();
- index = (index + 1) % query_dag_->size(), ++num_operators_checked) {
- if (query_exec_state_->hasExecutionFinished(index)) {
- continue;
- }
- if (numa_node != -1) {
- // First try to get a normal WorkOrder from the specified NUMA node.
- work_order = workorders_container_->getNormalWorkOrderForNUMANode(index, numa_node);
- if (work_order != nullptr) {
- // A WorkOrder found on the given NUMA node.
- query_exec_state_->incrementNumQueuedWorkOrders(index);
- return WorkerMessage::WorkOrderMessage(work_order, index);
- } else {
- // Normal workorder not found on this node. Look for a rebuild workorder
- // on this NUMA node.
- work_order = workorders_container_->getRebuildWorkOrderForNUMANode(index, numa_node);
- if (work_order != nullptr) {
- return WorkerMessage::RebuildWorkOrderMessage(work_order, index);
- }
- }
- }
- // Either no workorder found on the given NUMA node, or numa_node is -1.
- // Try to get a normal WorkOrder from other NUMA nodes.
- work_order = workorders_container_->getNormalWorkOrder(index);
- if (work_order != nullptr) {
- query_exec_state_->incrementNumQueuedWorkOrders(index);
- return WorkerMessage::WorkOrderMessage(work_order, index);
+void Foreman::dispatchWorkerMessages(const vector<unique_ptr<WorkerMessage>> &messages) {
+ for (auto const &message : messages) {
+ DCHECK(message != nullptr);
+ int recipient_worker_thread_index = message->getRecipientHint();
+ if (recipient_worker_thread_index != -1) {
+ sendWorkerMessage(static_cast<size_t>(recipient_worker_thread_index),
+ *message);
} else {
- // Normal WorkOrder not found, look for a RebuildWorkOrder.
- work_order = workorders_container_->getRebuildWorkOrder(index);
- if (work_order != nullptr) {
- return WorkerMessage::RebuildWorkOrderMessage(work_order, index);
- }
+ sendWorkerMessage(worker_directory_->getLeastLoadedWorker().first,
+ *message);
}
}
- // No WorkOrders available right now.
- return nullptr;
}
-void Foreman::sendWorkerMessage(const std::size_t worker_thread_index,
+void Foreman::sendWorkerMessage(const size_t worker_thread_index,
const WorkerMessage &message) {
- message_type_id type;
+ tmb::message_type_id type;
if (message.getType() == WorkerMessage::WorkerMessageType::kRebuildWorkOrder) {
type = kRebuildWorkOrderMessage;
} else if (message.getType() == WorkerMessage::WorkerMessageType::kWorkOrder) {
@@ -383,152 +217,12 @@ void Foreman::sendWorkerMessage(const std::size_t worker_thread_index,
const tmb::MessageBus::SendStatus send_status =
QueryExecutionUtil::SendTMBMessage(bus_,
foreman_client_id_,
- workers_->getClientID(worker_thread_index),
+ worker_directory_->getClientID(worker_thread_index),
move(worker_tagged_message));
CHECK(send_status == tmb::MessageBus::SendStatus::kOK) <<
"Message could not be sent from Foreman with TMB client ID "
<< foreman_client_id_ << " to Foreman with TMB client ID "
- << workers_->getClientID(worker_thread_index);
-}
-
-bool Foreman::fetchNormalWorkOrders(const dag_node_index index) {
- bool generated_new_workorders = false;
- if (!query_exec_state_->hasDoneGenerationWorkOrders(index)) {
- // Do not fetch any work units until all blocking dependencies are met.
- // The releational operator is not aware of blocking dependencies for
- // uncorrelated scalar queries.
- if (!checkAllBlockingDependenciesMet(index)) {
- return false;
- }
- const size_t num_pending_workorders_before =
- workorders_container_->getNumNormalWorkOrders(index);
- const bool done_generation =
- query_dag_->getNodePayloadMutable(index)->getAllWorkOrders(workorders_container_.get(),
- query_context_.get(),
- storage_manager_,
- foreman_client_id_,
- bus_);
- if (done_generation) {
- query_exec_state_->setDoneGenerationWorkOrders(index);
- }
-
- // TODO(shoban): It would be a good check to see if operator is making
- // useful progress, i.e., the operator either generates work orders to
- // execute or still has pending work orders executing. However, this will not
- // work if Foreman polls operators without feeding data. This check can be
- // enabled, if Foreman is refactored to call getAllWorkOrders() only when
- // pending work orders are completed or new input blocks feed.
-
- generated_new_workorders =
- (num_pending_workorders_before <
- workorders_container_->getNumNormalWorkOrders(index));
- }
- return generated_new_workorders;
-}
-
-void Foreman::processOperator(const dag_node_index index,
- const bool recursively_check_dependents) {
- if (fetchNormalWorkOrders(index)) {
- // Fetched work orders. Return to wait for the generated work orders to
- // execute, and skip the execution-finished checks.
- return;
- }
-
- if (checkNormalExecutionOver(index)) {
- if (checkRebuildRequired(index)) {
- if (!checkRebuildInitiated(index)) {
- // Rebuild hasn't started, initiate it.
- if (initiateRebuild(index)) {
- // Rebuild initiated and completed right away.
- markOperatorFinished(index);
- } else {
- // Rebuild WorkOrders have been generated.
- return;
- }
- } else if (checkRebuildOver(index)) {
- // Rebuild had been initiated and it is over.
- markOperatorFinished(index);
- }
- } else {
- // Rebuild is not required and normal execution over, mark finished.
- markOperatorFinished(index);
- }
- // If we reach here, that means the operator has been marked as finished.
- if (recursively_check_dependents) {
- for (const pair<dag_node_index, bool> &dependent_link :
- query_dag_->getDependents(index)) {
- const dag_node_index dependent_op_index = dependent_link.first;
- if (checkAllBlockingDependenciesMet(dependent_op_index)) {
- processOperator(dependent_op_index, true);
- }
- }
- }
- }
-}
-
-void Foreman::markOperatorFinished(const dag_node_index index) {
- query_exec_state_->setExecutionFinished(index);
-
- RelationalOperator *op = query_dag_->getNodePayloadMutable(index);
- op->updateCatalogOnCompletion();
-
- const relation_id output_rel = op->getOutputRelationID();
- for (const pair<dag_node_index, bool> &dependent_link : query_dag_->getDependents(index)) {
- const dag_node_index dependent_op_index = dependent_link.first;
- RelationalOperator *dependent_op = query_dag_->getNodePayloadMutable(dependent_op_index);
- // Signal dependent operator that current operator is done feeding input blocks.
- if (output_rel >= 0) {
- dependent_op->doneFeedingInputBlocks(output_rel);
- }
- if (checkAllBlockingDependenciesMet(dependent_op_index)) {
- dependent_op->informAllBlockingDependenciesMet();
- }
- }
-}
-
-bool Foreman::initiateRebuild(const dag_node_index index) {
- DEBUG_ASSERT(!workorders_container_->hasRebuildWorkOrder(index));
- DEBUG_ASSERT(checkRebuildRequired(index));
- DEBUG_ASSERT(!checkRebuildInitiated(index));
-
- getRebuildWorkOrders(index, workorders_container_.get());
-
- query_exec_state_->setRebuildStatus(
- index, workorders_container_->getNumRebuildWorkOrders(index), true);
-
- return (query_exec_state_->getNumRebuildWorkOrders(index) == 0);
-}
-
-void Foreman::getRebuildWorkOrders(const dag_node_index index, WorkOrdersContainer *container) {
- const RelationalOperator &op = query_dag_->getNodePayload(index);
- const QueryContext::insert_destination_id insert_destination_index = op.getInsertDestinationID();
-
- if (insert_destination_index == QueryContext::kInvalidInsertDestinationId) {
- return;
- }
-
- vector<MutableBlockReference> partially_filled_block_refs;
-
- DCHECK(query_context_ != nullptr);
- InsertDestination *insert_destination = query_context_->getInsertDestination(insert_destination_index);
- DCHECK(insert_destination != nullptr);
-
- insert_destination->getPartiallyFilledBlocks(&partially_filled_block_refs);
-
- for (vector<MutableBlockReference>::size_type i = 0;
- i < partially_filled_block_refs.size();
- ++i) {
- // Note: The query ID used below is dummy for now, it will be replaced with
- // the true query ID when QueryManager gets used in Foreman.
- container->addRebuildWorkOrder(
- new RebuildWorkOrder(0,
- move(partially_filled_block_refs[i]),
- index,
- op.getOutputRelationID(),
- foreman_client_id_,
- bus_),
- index);
- }
+ << worker_directory_->getClientID(worker_thread_index);
}
} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/query_execution/Foreman.hpp
----------------------------------------------------------------------
diff --git a/query_execution/Foreman.hpp b/query_execution/Foreman.hpp
index 2d6e0d3..5c4893d 100644
--- a/query_execution/Foreman.hpp
+++ b/query_execution/Foreman.hpp
@@ -22,46 +22,40 @@
#include <memory>
#include <vector>
-#include "catalog/CatalogTypedefs.hpp"
#include "query_execution/ForemanLite.hpp"
-#include "query_execution/QueryContext.hpp"
-#include "query_execution/QueryExecutionState.hpp"
-#include "query_execution/QueryExecutionTypedefs.hpp"
-#include "query_execution/WorkOrdersContainer.hpp"
-#include "query_execution/WorkerMessage.hpp"
-#include "relational_operators/RelationalOperator.hpp"
-#include "relational_operators/WorkOrder.hpp"
-#include "storage/StorageBlockInfo.hpp"
-#include "utility/DAG.hpp"
+#include "query_execution/PolicyEnforcer.hpp"
#include "utility/Macros.hpp"
-#include "glog/logging.h"
-#include "gtest/gtest_prod.h"
+#include "gflags/gflags.h"
+#include "tmb/id_typedefs.h"
#include "tmb/message_bus.h"
namespace quickstep {
+DECLARE_uint64(min_load_per_worker);
+
class CatalogDatabaseLite;
class StorageManager;
class WorkerDirectory;
-
-namespace serialization { class QueryContext; }
+class WorkerMessage;
/** \addtogroup QueryExecution
* @{
*/
/**
- * @brief The Foreman scans the query DAG, requests each operator to produce
- * workorders. It also pipelines the intermediate output it receives to
- * the relational operators which need it.
+ * @brief The Foreman receives queries from the main thread, messages from the
+ * policy enforcer and dispatches the work to worker threads. It also
+ * receives work completion messages from workers.
**/
class Foreman final : public ForemanLite {
public:
/**
* @brief Constructor.
*
+ * @param main_thread_client_id The TMB client ID of the main thread.
+ * @param worker_directory The worker directory.
* @param bus A pointer to the TMB.
* @param catalog_database The catalog database where this query is executed.
* @param storage_manager The StorageManager to use.
@@ -71,233 +65,31 @@ class Foreman final : public ForemanLite {
* @note If cpu_id is not specified, Foreman thread can be possibly moved
* around on different CPUs by the OS.
**/
- Foreman(tmb::MessageBus *bus,
+ Foreman(const tmb::client_id main_thread_client_id,
+ WorkerDirectory *worker_directory,
+ tmb::MessageBus *bus,
CatalogDatabaseLite *catalog_database,
StorageManager *storage_manager,
const int cpu_id = -1,
- const int num_numa_nodes = 1)
- : ForemanLite(bus, cpu_id),
- catalog_database_(DCHECK_NOTNULL(catalog_database)),
- storage_manager_(DCHECK_NOTNULL(storage_manager)),
- max_msgs_per_worker_(1),
- num_numa_nodes_(num_numa_nodes) {
- bus_->RegisterClientAsSender(foreman_client_id_, kWorkOrderMessage);
- bus_->RegisterClientAsSender(foreman_client_id_, kRebuildWorkOrderMessage);
- // NOTE : Foreman thread sends poison messages in the optimizer's
- // ExecutionGeneratorTest.
- bus_->RegisterClientAsSender(foreman_client_id_, kPoisonMessage);
-
- bus_->RegisterClientAsReceiver(foreman_client_id_,
- kWorkOrderCompleteMessage);
- bus_->RegisterClientAsReceiver(foreman_client_id_,
- kRebuildWorkOrderCompleteMessage);
- bus_->RegisterClientAsReceiver(foreman_client_id_, kCatalogRelationNewBlockMessage);
- bus_->RegisterClientAsReceiver(foreman_client_id_, kDataPipelineMessage);
- bus_->RegisterClientAsReceiver(foreman_client_id_,
- kWorkOrdersAvailableMessage);
- bus_->RegisterClientAsReceiver(foreman_client_id_,
- kWorkOrderFeedbackMessage);
- }
+ const std::size_t num_numa_nodes = 1);
~Foreman() override {}
- /**
- * @brief Set the Query plan DAG for the query to be executed.
- *
- * @param query_plan_dag A pointer to the query plan DAG.
- **/
- inline void setQueryPlan(DAG<RelationalOperator, bool> *query_plan_dag) {
- query_dag_ = query_plan_dag;
- }
-
- /**
- * @brief Reconstruct the QueryContext for the query to be executed.
- *
- * @param proto The serialized QueryContext.
- **/
- inline void reconstructQueryContextFromProto(const serialization::QueryContext &proto) {
- query_context_.reset(
- new QueryContext(proto, *catalog_database_, storage_manager_, foreman_client_id_, bus_));
- }
-
- /**
- * @brief Set the WorkerDirectory pointer.
- *
- * @param workers A pointer to the WorkerDirectory.
- **/
- void setWorkerDirectory(WorkerDirectory *workers) {
- workers_ = workers;
- }
-
- /**
- * @brief Set the maximum number of messages that should be allocated to each
- * worker during a single round of WorkOrder dispatch.
- *
- * @param max_msgs_per_worker Maximum number of messages.
- **/
- void setMaxMessagesPerWorker(const std::size_t max_msgs_per_worker) {
- max_msgs_per_worker_ = max_msgs_per_worker;
- }
-
protected:
/**
- * @brief The foreman receives a DAG of relational operators, asks relational
- * operators to produce the workorders and based on the response it gets
- * pipelines the intermediate output to dependent relational operators.
- *
- * @note The workers who get the messages from the Foreman execute and
- * subsequently delete the WorkOrder contained in the message.
+ * @brief Run the event-based loop in the Foreman thread.
**/
void run() override;
private:
- typedef DAG<RelationalOperator, bool>::size_type_nodes dag_node_index;
-
- /**
- * @brief Check if all the dependencies of the node at specified index have
- * finished their execution.
- *
- * @note This function's true return value is a pre-requisite for calling
- * getRebuildWorkOrders()
- *
- * @param node_index The index of the specified node in the query DAG.
- *
- * @return True if all the dependencies have finished their execution. False
- * otherwise.
- **/
- inline bool checkAllDependenciesMet(const dag_node_index node_index) const {
- for (const dag_node_index dependency_index : query_dag_->getDependencies(node_index)) {
- // If at least one of the dependencies is not met, return false.
- if (!query_exec_state_->hasExecutionFinished(dependency_index)) {
- return false;
- }
- }
- return true;
- }
-
- /**
- * @brief Check if all the blocking dependencies of the node at specified
- * index have finished their execution.
- *
- * @note A blocking dependency is the one which is pipeline breaker. Output of
- * a dependency can't be streamed to its dependent if the link between
- * them is pipeline breaker.
- *
- * @param node_index The index of the specified node in the query DAG.
- *
- * @return True if all the blocking dependencies have finished their
- * execution. False otherwise.
- **/
- inline bool checkAllBlockingDependenciesMet(const dag_node_index node_index) const {
- for (const dag_node_index blocking_dependency_index : blocking_dependencies_[node_index]) {
- if (!query_exec_state_->hasExecutionFinished(blocking_dependency_index)) {
- return false;
- }
- }
- return true;
- }
-
/**
* @brief Dispatch schedulable WorkOrders, wrapped in WorkerMessages to the
* worker threads.
*
- * @param start_worker_index The dispatch of WorkOrders preferably begins with
- * the worker at this index.
- * @param start_operator_index The search for a schedulable WorkOrder
- * begins with the WorkOrders generated by this operator.
- **/
- void dispatchWorkerMessages(const std::size_t start_worker_index,
- const dag_node_index start_operator_index);
-
- /**
- * @brief Initialize all the local vectors and maps. If the operator has an
- * InsertDestination, pass the bus address and Foreman's TMB client ID
- * to it.
- **/
- void initializeState();
-
- /**
- * @brief Initialize the Foreman before starting the event loop. This binds
- * the Foreman thread to configured CPU, and does initial processing of
- * operator before waiting for events from Workers.
- **/
- void initialize();
-
- /**
- * @brief Process the received WorkOrder complete message.
- *
- * @param node_index The index of the specified operator node in the query DAG
- * for the completed WorkOrder.
- * @param worker_thread_index The logical index of the worker thread in
- * WorkerDirectory for the completed WorkOrder.
- **/
- void processWorkOrderCompleteMessage(const dag_node_index op_index,
- const std::size_t worker_thread_index);
-
- /**
- * @brief Process the received RebuildWorkOrder complete message.
- *
- * @param node_index The index of the specified operator node in the query DAG
- * for the completed RebuildWorkOrder.
- * @param worker_thread_index The logical index of the worker thread in
- * WorkerDirectory for the completed RebuildWorkOrder.
- **/
- void processRebuildWorkOrderCompleteMessage(const dag_node_index op_index,
- const std::size_t worker_thread_index);
-
- /**
- * @brief Process the received data pipeline message.
- *
- * @param node_index The index of the specified operator node in the query DAG
- * for the pipelining block.
- * @param block The block id.
- * @param rel_id The ID of the relation that produced 'block'.
- **/
- void processDataPipelineMessage(const dag_node_index op_index,
- const block_id block,
- const relation_id rel_id);
-
- /**
- * @brief Process the received work order feedback message and notify relational
- * operator.
- *
- * @param message Feedback message from work order.
- **/
- void processFeedbackMessage(const WorkOrder::FeedbackMessage &message);
-
- /**
- * @brief Clear some of the vectors used for a single run of a query.
- **/
- void cleanUp() {
- output_consumers_.clear();
- blocking_dependencies_.clear();
- }
-
- /**
- * @brief Process a current relational operator: Get its workorders and store
- * them in the WorkOrdersContainer for this query. If the operator can
- * be marked as done, do so.
- *
- * @param index The index of the relational operator to be processed in the
- * query plan DAG.
- * @param recursively_check_dependents If an operator is done, should we
- * call processOperator on its dependents recursively.
+ * @param messages The messages to be dispatched.
**/
- void processOperator(const dag_node_index index, const bool recursively_check_dependents);
-
- /**
- * @brief Get the next workorder to be excuted, wrapped in a WorkerMessage.
- *
- * @param start_operator_index Begin the search for the schedulable WorkOrder
- * with the operator at this index.
- * @param numa_node The next WorkOrder should preferably have its input(s)
- * from this numa_node. This is a hint and not a binding requirement.
- *
- * @return A pointer to the WorkerMessage. If there's no WorkOrder to be
- * executed, return NULL.
- **/
- WorkerMessage* getNextWorkerMessage(
- const dag_node_index start_operator_index, const int numa_node = -1);
+ void dispatchWorkerMessages(
+ const std::vector<std::unique_ptr<WorkerMessage>> &messages);
/**
* @brief Send the given message to the specified worker.
@@ -306,156 +98,24 @@ class Foreman final : public ForemanLite {
* in WorkerDirectory.
* @param message The WorkerMessage to be sent.
**/
- void sendWorkerMessage(const std::size_t worker_thread_index, const WorkerMessage &message);
+ void sendWorkerMessage(const std::size_t worker_thread_index,
+ const WorkerMessage &message);
/**
- * @brief Fetch all work orders currently available in relational operator and
- * store them internally.
- *
- * @param index The index of the relational operator to be processed in the
- * query plan DAG.
+ * @brief Check if we can collect new messages from the PolicyEnforcer.
*
- * @return Whether any work order was generated by op.
+ * @param message_type The type of the last received message.
**/
- bool fetchNormalWorkOrders(const dag_node_index index);
+ bool canCollectNewMessages(const tmb::message_type_id message_type);
- /**
- * @brief This function does the following things:
- * 1. Mark the given relational operator as "done".
- * 2. For all the dependents of this operator, check if all of their
- * blocking dependencies are met. If so inform them that the blocking
- * dependencies are met.
- * 3. Check if the given operator is done producing output. If it's
- * done, inform the dependents that they won't receive input anymore
- * from the given operator.
- *
- * @param index The index of the given relational operator in the DAG.
- **/
- void markOperatorFinished(const dag_node_index index);
+ const tmb::client_id main_thread_client_id_;
- /**
- * @brief Check if the execution of the given operator is over.
- *
- * @param index The index of the given operator in the DAG.
- *
- * @return True if the execution of the given operator is over, false
- * otherwise.
- **/
- inline bool checkOperatorExecutionOver(const dag_node_index index) const {
- if (checkRebuildRequired(index)) {
- return (checkNormalExecutionOver(index) && checkRebuildOver(index));
- } else {
- return checkNormalExecutionOver(index);
- }
- }
-
- /**
- * @brief Check if the given operator's normal execution is over.
- *
- * @note The conditions for a given operator's normal execution to get over:
- * 1. All of its normal (i.e. non rebuild) WorkOrders have finished
- * execution.
- * 2. The operator is done generating work orders.
- * 3. All of the dependencies of the given operator have been met.
- *
- * @param index The index of the given operator in the DAG.
- *
- * @return True if the normal execution of the given operator is over, false
- * otherwise.
- **/
- inline bool checkNormalExecutionOver(const dag_node_index index) const {
- return (checkAllDependenciesMet(index) &&
- !workorders_container_->hasNormalWorkOrder(index) &&
- query_exec_state_->getNumQueuedWorkOrders(index) == 0 &&
- query_exec_state_->hasDoneGenerationWorkOrders(index));
- }
-
- /**
- * @brief Check if the rebuild operation is required for a given operator.
- *
- * @param index The index of the given operator in the DAG.
- *
- * @return True if the rebuild operation is required, false otherwise.
- **/
- inline bool checkRebuildRequired(const dag_node_index index) const {
- return query_exec_state_->isRebuildRequired(index);
- }
-
- /**
- * @brief Check if the rebuild operation for a given operator is over.
- *
- * @param index The index of the given operator in the DAG.
- *
- * @return True if the rebuild operation is over, false otherwise.
- **/
- inline bool checkRebuildOver(const dag_node_index index) const {
- return query_exec_state_->hasRebuildInitiated(index) &&
- !workorders_container_->hasRebuildWorkOrder(index) &&
- (query_exec_state_->getNumRebuildWorkOrders(index) == 0);
- }
-
- /**
- * @brief Check if the rebuild operation for a given operator has been
- * initiated.
- *
- * @param index The index of the given operator in the DAG.
- *
- * @return True if the rebuild operation has been initiated, false otherwise.
- **/
- inline bool checkRebuildInitiated(const dag_node_index index) const {
- return query_exec_state_->hasRebuildInitiated(index);
- }
-
- /**
- * @brief Initiate the rebuild process for partially filled blocks generated
- * during the execution of the given operator.
- *
- * @param index The index of the given operator in the DAG.
- *
- * @return True if the rebuild is over immediately, i.e. the operator didn't
- * generate any rebuild WorkOrders, false otherwise.
- **/
- bool initiateRebuild(const dag_node_index index);
-
- /**
- * @brief Get the rebuild WorkOrders for an operator.
- *
- * @note This function should be called only once, when all the normal
- * WorkOrders generated by an operator finish their execution.
- *
- * @param index The index of the operator in the query plan DAG.
- * @param container A pointer to a WorkOrdersContainer to be used to store the
- * generated WorkOrders.
- **/
- void getRebuildWorkOrders(const dag_node_index index, WorkOrdersContainer *container);
+ WorkerDirectory *worker_directory_;
CatalogDatabaseLite *catalog_database_;
StorageManager *storage_manager_;
- DAG<RelationalOperator, bool> *query_dag_;
-
- std::unique_ptr<QueryContext> query_context_;
-
- // During a single round of WorkOrder dispatch, a Worker should be allocated
- // at most these many WorkOrders.
- std::size_t max_msgs_per_worker_;
-
- // For all nodes, store their receiving dependents.
- std::vector<std::vector<dag_node_index>> output_consumers_;
-
- // For all nodes, store their pipeline breaking dependencies (if any).
- std::vector<std::vector<dag_node_index>> blocking_dependencies_;
-
- std::unique_ptr<QueryExecutionState> query_exec_state_;
-
- std::unique_ptr<WorkOrdersContainer> workorders_container_;
-
- const int num_numa_nodes_;
-
- WorkerDirectory *workers_;
-
- friend class ForemanTest;
- FRIEND_TEST(ForemanTest, TwoNodesDAGPartiallyFilledBlocksTest);
+ std::unique_ptr<PolicyEnforcer> policy_enforcer_;
DISALLOW_COPY_AND_ASSIGN(Foreman);
};
[05/12] incubator-quickstep git commit: Improve text scan operator
Posted by hb...@apache.org.
Improve text scan operator
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/55b06fab
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/55b06fab
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/55b06fab
Branch: refs/heads/query-manager-used-in-foreman
Commit: 55b06fab1bd336f2cc7ee4bd557d3328a428e4ab
Parents: 2d39b8e
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Thu Jun 9 03:18:37 2016 -0500
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Thu Jun 9 03:38:28 2016 -0500
----------------------------------------------------------------------
query_optimizer/ExecutionGenerator.cpp | 1 -
relational_operators/CMakeLists.txt | 23 +-
relational_operators/TextScanOperator.cpp | 818 ++++++-------------
relational_operators/TextScanOperator.hpp | 286 +++----
relational_operators/WorkOrder.proto | 15 +-
relational_operators/WorkOrderFactory.cpp | 72 +-
.../tests/TextScanOperator_unittest.cpp | 1 -
relational_operators/tests/text_scan_input.txt | 8 +-
8 files changed, 384 insertions(+), 840 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/55b06fab/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index 99c2a21..f9fd742 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -945,7 +945,6 @@ void ExecutionGenerator::convertCopyFrom(
physical_plan->file_name(),
physical_plan->column_delimiter(),
physical_plan->escape_strings(),
- FLAGS_parallelize_load,
*output_relation,
insert_destination_index));
insert_destination_proto->set_relational_op_index(scan_operator_index);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/55b06fab/relational_operators/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/relational_operators/CMakeLists.txt b/relational_operators/CMakeLists.txt
index eec5300..315997f 100644
--- a/relational_operators/CMakeLists.txt
+++ b/relational_operators/CMakeLists.txt
@@ -1,5 +1,7 @@
# Copyright 2011-2015 Quickstep Technologies LLC.
# Copyright 2015-2016 Pivotal Software, Inc.
+# Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+# University of Wisconsin\u2014Madison.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -16,9 +18,6 @@
QS_PROTOBUF_GENERATE_CPP(relationaloperators_SortMergeRunOperator_proto_srcs
relationaloperators_SortMergeRunOperator_proto_hdrs
SortMergeRunOperator.proto)
-QS_PROTOBUF_GENERATE_CPP(relationaloperators_TextScanOperator_proto_srcs
- relationaloperators_TextScanOperator_proto_hdrs
- TextScanOperator.proto)
QS_PROTOBUF_GENERATE_CPP(relationaloperators_WorkOrder_proto_srcs
relationaloperators_WorkOrder_proto_hdrs
WorkOrder.proto)
@@ -55,9 +54,6 @@ add_library(quickstep_relationaloperators_SortRunGenerationOperator SortRunGener
SortRunGenerationOperator.hpp)
add_library(quickstep_relationaloperators_TableGeneratorOperator TableGeneratorOperator.cpp TableGeneratorOperator.hpp)
add_library(quickstep_relationaloperators_TextScanOperator TextScanOperator.cpp TextScanOperator.hpp)
-add_library(quickstep_relationaloperators_TextScanOperator_proto
- ${relationaloperators_TextScanOperator_proto_srcs}
- ${relationaloperators_TextScanOperator_proto_hdrs})
add_library(quickstep_relationaloperators_UpdateOperator UpdateOperator.cpp UpdateOperator.hpp)
add_library(quickstep_relationaloperators_WorkOrder ../empty_src.cpp WorkOrder.hpp)
add_library(quickstep_relationaloperators_WorkOrderFactory WorkOrderFactory.cpp WorkOrderFactory.hpp)
@@ -354,27 +350,19 @@ target_link_libraries(quickstep_relationaloperators_TextScanOperator
glog
quickstep_catalog_CatalogAttribute
quickstep_catalog_CatalogRelation
- quickstep_catalog_CatalogRelationSchema
quickstep_catalog_CatalogTypedefs
quickstep_queryexecution_QueryContext
- quickstep_queryexecution_QueryExecutionMessages_proto
- quickstep_queryexecution_QueryExecutionTypedefs
- quickstep_queryexecution_QueryExecutionUtil
quickstep_queryexecution_WorkOrdersContainer
quickstep_relationaloperators_RelationalOperator
- quickstep_relationaloperators_TextScanOperator_proto
quickstep_relationaloperators_WorkOrder
quickstep_storage_InsertDestination
- quickstep_storage_StorageBlob
- quickstep_storage_StorageBlockInfo
- quickstep_storage_StorageManager
- quickstep_threading_ThreadIDBasedMap
quickstep_types_Type
quickstep_types_TypedValue
+ quickstep_types_containers_ColumnVector
+ quickstep_types_containers_ColumnVectorsValueAccessor
quickstep_types_containers_Tuple
quickstep_utility_Glob
quickstep_utility_Macros
- quickstep_utility_ThreadSafeQueue
tmb)
target_link_libraries(quickstep_relationaloperators_UpdateOperator
glog
@@ -424,7 +412,6 @@ target_link_libraries(quickstep_relationaloperators_WorkOrderFactory
quickstep_relationaloperators_SortRunGenerationOperator
quickstep_relationaloperators_TableGeneratorOperator
quickstep_relationaloperators_TextScanOperator
- quickstep_relationaloperators_TextScanOperator_proto
quickstep_relationaloperators_UpdateOperator
quickstep_relationaloperators_WorkOrder_proto
quickstep_storage_StorageBlockInfo
@@ -432,7 +419,6 @@ target_link_libraries(quickstep_relationaloperators_WorkOrderFactory
tmb)
target_link_libraries(quickstep_relationaloperators_WorkOrder_proto
quickstep_relationaloperators_SortMergeRunOperator_proto
- quickstep_relationaloperators_TextScanOperator_proto
${PROTOBUF_LIBRARY})
# Module all-in-one library:
@@ -460,7 +446,6 @@ target_link_libraries(quickstep_relationaloperators
quickstep_relationaloperators_SortRunGenerationOperator
quickstep_relationaloperators_TableGeneratorOperator
quickstep_relationaloperators_TextScanOperator
- quickstep_relationaloperators_TextScanOperator_proto
quickstep_relationaloperators_UpdateOperator
quickstep_relationaloperators_WorkOrder
quickstep_relationaloperators_WorkOrderFactory
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/55b06fab/relational_operators/TextScanOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/TextScanOperator.cpp b/relational_operators/TextScanOperator.cpp
index 5acecbf..d2fd0cd 100644
--- a/relational_operators/TextScanOperator.cpp
+++ b/relational_operators/TextScanOperator.cpp
@@ -1,6 +1,8 @@
/**
* Copyright 2011-2015 Quickstep Technologies LLC.
* Copyright 2015-2016 Pivotal Software, Inc.
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin\u2014Madison.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -20,124 +22,30 @@
#include <algorithm>
#include <cctype>
#include <cstddef>
-#include <cstdint>
#include <cstdio>
#include <cstdlib>
-#include <cstring>
+#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "catalog/CatalogAttribute.hpp"
-#include "catalog/CatalogRelationSchema.hpp"
#include "query_execution/QueryContext.hpp"
-#include "query_execution/QueryExecutionMessages.pb.h"
-#include "query_execution/QueryExecutionUtil.hpp"
#include "query_execution/WorkOrdersContainer.hpp"
-#include "relational_operators/TextScanOperator.pb.h"
#include "storage/InsertDestination.hpp"
-#include "storage/StorageBlob.hpp"
-#include "storage/StorageBlockInfo.hpp"
-#include "storage/StorageManager.hpp"
-#include "threading/ThreadIDBasedMap.hpp"
#include "types/Type.hpp"
#include "types/TypedValue.hpp"
#include "types/containers/Tuple.hpp"
+#include "types/containers/ColumnVector.hpp"
+#include "types/containers/ColumnVectorsValueAccessor.hpp"
#include "utility/Glob.hpp"
-#include "gflags/gflags.h"
#include "glog/logging.h"
#include "tmb/id_typedefs.h"
-#include "tmb/message_bus.h"
-#include "tmb/tagged_message.h"
-
-using std::isxdigit;
-using std::size_t;
-using std::sscanf;
-using std::string;
namespace quickstep {
-DEFINE_uint64(textscan_split_blob_size, 2,
- "Size of blobs in number of slots the input text files "
- "are split into in the TextScanOperator.");
-
-// Check if blob size is positive.
-static bool ValidateTextScanSplitBlobSize(const char *flagname,
- std::uint64_t blob_size) {
- if (blob_size == 0) {
- LOG(ERROR) << "--" << flagname << " must be greater than 0";
- return false;
- }
-
- return true;
-}
-
-static const volatile bool text_scan_split_blob_size_dummy = gflags::RegisterFlagValidator(
- &FLAGS_textscan_split_blob_size, &ValidateTextScanSplitBlobSize);
-
-namespace {
-
-// Detect whether '*search_string' contains a row-terminator (either line-feed
-// or carriage-return + line-feed) immediately before 'end_pos'. If
-// 'process_escape_sequences' is true, this function will also eliminate
-// false-positives from an escaped row-terminator. Returns the number of
-// characters in the row-terminator, or 0 if no terminator is detected.
-inline unsigned DetectRowTerminator(const char *search_string,
- std::size_t end_pos,
- const bool process_escape_sequences) {
- if (end_pos == 0) {
- // Empty string.
- return 0;
- }
-
- if (search_string[end_pos - 1] != '\n') {
- // String doesn't end in newline.
- return 0;
- }
-
- if (end_pos == 1) {
- // String is the single newline character.
- return 1;
- }
-
- const bool have_carriage_return = (search_string[end_pos - 2] == '\r');
- if (have_carriage_return && (end_pos == 2)) {
- // String is CR-LF and nothing else.
- return 2;
- }
-
- std::size_t backslashes = 0;
- // Count consecutive backslashes preceding the terminator. If there is an odd
- // number of backslashes, then the terminator is escaped and doesn't count as
- // a real terminator. If there is an even number of backslashes, then each
- // pair is an escaped backslash literal and the terminator still counts.
- if (process_escape_sequences) {
- end_pos = end_pos - 2 - have_carriage_return;
- while (end_pos != 0) {
- if (search_string[end_pos] == '\\') {
- ++backslashes;
- --end_pos;
- if ((end_pos == 0) && (search_string[0] == '\\')) {
- // Don't forget to count a backslash at the very beginning of a string.
- ++backslashes;
- }
- } else {
- break;
- }
- }
- }
-
- if (backslashes & 0x1) {
- return 0;
- } else {
- return 1 + have_carriage_return;
- }
-}
-
-} // namespace
-
bool TextScanOperator::getAllWorkOrders(
WorkOrdersContainer *container,
QueryContext *query_context,
@@ -155,116 +63,50 @@ bool TextScanOperator::getAllWorkOrders(
InsertDestination *output_destination =
query_context->getInsertDestination(output_destination_index_);
- if (parallelize_load_) {
- // Parallel implementation: Split work orders are generated for each file
- // being bulk-loaded. (More than one file can be loaded, because we support
- // glob() semantics in file name.) These work orders read the input file,
- // and split them in the blobs that can be parsed independently.
- if (blocking_dependencies_met_) {
- if (!work_generated_) {
- // First, generate text-split work orders.
- for (const auto &file : files) {
- container->addNormalWorkOrder(
- new TextSplitWorkOrder(query_id_,
- file,
- process_escape_sequences_,
- storage_manager,
- op_index_,
- scheduler_client_id,
- bus),
- op_index_);
- ++num_split_work_orders_;
- }
- work_generated_ = true;
- return false;
- } else {
- // Check if there are blobs to parse.
- while (!text_blob_queue_.empty()) {
- const TextBlob blob_work = text_blob_queue_.popOne();
- container->addNormalWorkOrder(
- new TextScanWorkOrder(query_id_,
- blob_work.blob_id,
- blob_work.size,
- field_terminator_,
- process_escape_sequences_,
- output_destination,
- storage_manager),
- op_index_);
- }
- // Done if all split work orders are completed, and no blobs are left to
- // process.
- return num_done_split_work_orders_.load(std::memory_order_acquire) == num_split_work_orders_ &&
- text_blob_queue_.empty();
- }
- }
- return false;
- } else {
- // Serial implementation.
- if (blocking_dependencies_met_ && !work_generated_) {
- for (const auto &file : files) {
+ // Text segment size set to 256KB.
+ constexpr std::size_t kTextSegmentSize = 0x40000u;
+
+ if (blocking_dependencies_met_ && !work_generated_) {
+ for (const std::string &file : files) {
+ // Use standard C libary to retrieve the file size.
+ FILE *fp = std::fopen(file.c_str(), "rb");
+ std::fseek(fp, 0, SEEK_END);
+ const std::size_t file_size = std::ftell(fp);
+ std::fclose(fp);
+
+ std::size_t text_offset = 0;
+ while (text_offset < file_size) {
container->addNormalWorkOrder(
new TextScanWorkOrder(query_id_,
file,
+ text_offset,
+ std::min(kTextSegmentSize, file_size - text_offset),
field_terminator_,
process_escape_sequences_,
output_destination,
storage_manager),
op_index_);
+ text_offset += kTextSegmentSize;
}
- work_generated_ = true;
}
- return work_generated_;
- }
-}
-
-void TextScanOperator::receiveFeedbackMessage(const WorkOrder::FeedbackMessage &msg) {
- switch (msg.type()) {
- case kSplitWorkOrderCompletionMessage: {
- num_done_split_work_orders_.fetch_add(1, std::memory_order_release);
- break;
- }
- case kNewTextBlobMessage: {
- serialization::TextBlob proto;
- CHECK(proto.ParseFromArray(msg.payload(), msg.payload_size()));
- text_blob_queue_.push(TextBlob(proto.blob_id(), proto.size()));
- break;
- }
- default:
- LOG(ERROR) << "Unknown feedback message type for TextScanOperator";
+ work_generated_ = true;
}
+ return work_generated_;
}
TextScanWorkOrder::TextScanWorkOrder(const std::size_t query_id,
const std::string &filename,
+ const std::size_t text_offset,
+ const std::size_t text_segment_size,
const char field_terminator,
const bool process_escape_sequences,
InsertDestination *output_destination,
StorageManager *storage_manager)
: WorkOrder(query_id),
- is_file_(true),
filename_(filename),
+ text_offset_(text_offset),
+ text_segment_size_(text_segment_size),
field_terminator_(field_terminator),
- text_blob_(0),
- text_size_(0),
- process_escape_sequences_(process_escape_sequences),
- output_destination_(output_destination),
- storage_manager_(storage_manager) {
- DCHECK(output_destination_ != nullptr);
- DCHECK(storage_manager_ != nullptr);
-}
-
-TextScanWorkOrder::TextScanWorkOrder(const std::size_t query_id,
- const block_id text_blob,
- const std::size_t text_size,
- const char field_terminator,
- const bool process_escape_sequences,
- InsertDestination *output_destination,
- StorageManager *storage_manager)
- : WorkOrder(query_id),
- is_file_(false),
- field_terminator_(field_terminator),
- text_blob_(text_blob),
- text_size_(text_size),
process_escape_sequences_(process_escape_sequences),
output_destination_(output_destination),
storage_manager_(storage_manager) {
@@ -274,439 +116,293 @@ TextScanWorkOrder::TextScanWorkOrder(const std::size_t query_id,
void TextScanWorkOrder::execute() {
const CatalogRelationSchema &relation = output_destination_->getRelation();
+ std::vector<Tuple> tuples;
- string current_row_string;
- if (is_file_) {
- FILE *file = std::fopen(filename_.c_str(), "r");
- if (file == nullptr) {
- throw TextScanReadError(filename_);
- }
+ constexpr std::size_t kSmallBufferSize = 0x4000;
+ char *buffer = reinterpret_cast<char *>(malloc(std::max(text_segment_size_, kSmallBufferSize)));
- bool have_row = false;
- do {
- current_row_string.clear();
- have_row = readRowFromFile(file, ¤t_row_string);
- if (have_row) {
- Tuple tuple = parseRow(current_row_string, relation);
- output_destination_->insertTupleInBatch(tuple);
- }
- } while (have_row);
-
- std::fclose(file);
- } else {
- BlobReference blob = storage_manager_->getBlob(text_blob_);
- const char *blob_pos = static_cast<const char*>(blob->getMemory());
- const char *blob_end = blob_pos + text_size_;
- bool have_row = false;
- do {
- current_row_string.clear();
- have_row = readRowFromBlob(&blob_pos, blob_end, ¤t_row_string);
- if (have_row) {
- Tuple tuple = parseRow(current_row_string, relation);
- output_destination_->insertTupleInBatch(tuple);
- }
- } while (have_row);
-
- // Drop the consumed blob produced by TextSplitWorkOrder.
- blob.release();
- storage_manager_->deleteBlockOrBlobFile(text_blob_);
+ // Read text segment into buffer.
+ FILE *file = std::fopen(filename_.c_str(), "rb");
+ std::fseek(file, text_offset_, SEEK_SET);
+ std::size_t bytes_read = std::fread(buffer, 1, text_segment_size_, file);
+ if (bytes_read != text_segment_size_) {
+ throw TextScanReadError(filename_);
}
-}
-char TextScanWorkOrder::ParseOctalLiteral(const std::string &row_string,
- std::size_t *start_pos) {
- const std::size_t stop_pos = std::min(row_string.length(), *start_pos + 3);
-
- int value = 0;
- for (; *start_pos < stop_pos; ++*start_pos) {
- int char_value = row_string[*start_pos] - '0';
- if ((char_value >= 0) && (char_value < 8)) {
- value = value * 8 + char_value;
- } else {
- return value;
+ // Locate the first newline character.
+ const char *buffer_end = buffer + text_segment_size_;
+ const char *row_ptr = buffer;
+ if (text_offset_ != 0) {
+ while (row_ptr < buffer_end && *row_ptr != '\n') {
+ ++row_ptr;
}
+ } else {
+ --row_ptr;
}
- return value;
-}
-
-char TextScanWorkOrder::ParseHexLiteral(const std::string &row_string,
- std::size_t *start_pos) {
- const std::size_t stop_pos = std::min(row_string.length(), *start_pos + 2);
+ if (row_ptr >= buffer_end) {
+ // This block does not even contain a newline character.
+ return;
+ }
- int value = 0;
- for (; *start_pos < stop_pos; ++*start_pos) {
- if (!std::isxdigit(row_string[*start_pos])) {
- break;
- }
+ // Locate the last newline character.
+ const char *end_ptr = buffer_end - 1;
+ while (end_ptr > row_ptr && *end_ptr != '\n') {
+ --end_ptr;
+ }
- int char_value;
- if (std::isdigit(row_string[*start_pos])) {
- char_value = row_string[*start_pos] - '0';
- } else if (std::islower(row_string[*start_pos])) {
- char_value = row_string[*start_pos] - 'a' + 10;
+ // Advance both row_ptr and end_ptr by 1.
+ ++row_ptr;
+ ++end_ptr;
+ // Now row_ptr is pointing to the first character RIGHT AFTER the FIRST newline
+ // character in this text segment, and end_ptr is pointing to the first character
+ // RIGHT AFTER the LAST newline character in this text segment.
+
+ // Process the tuples which are between the first newline character and the
+ // last newline character.
+ while (row_ptr < end_ptr) {
+ if (*row_ptr == '\r' || *row_ptr == '\n') {
+ // Skip empty lines.
+ ++row_ptr;
} else {
- char_value = row_string[*start_pos] - 'A' + 10;
+ tuples.emplace_back(parseRow(&row_ptr, relation));
}
-
- value = value * 16 + char_value;
}
- return value;
-}
+ // Process the tuple that is right after the last newline character.
+ // NOTE(jianqiao): dynamic_read_size is trying to balance between the cases
+ // that the last tuple is very small / very large.
+ std::size_t dynamic_read_size = 1024;
+ std::string row_string;
+ std::fseek(file, text_offset_ + (end_ptr - buffer), SEEK_SET);
+ bool has_reached_end = false;
+ do {
+ bytes_read = std::fread(buffer, 1, dynamic_read_size, file);
+ std::size_t bytes_to_copy = bytes_read;
-bool TextScanWorkOrder::readRowFromFile(FILE *file, std::string *row_string) const {
- // Read up to 1023 chars + null-terminator at a time.
- static constexpr std::size_t kRowBufferSize = 1024;
- char row_buffer[kRowBufferSize];
- for (;;) {
- char *read_string = std::fgets(row_buffer, sizeof(row_buffer), file);
- if (read_string == nullptr) {
- if (std::feof(file)) {
- if (row_string->empty()) {
- return false;
- } else {
- throw TextScanFormatError("File ended without delimiter");
- }
- } else {
- throw TextScanReadError(filename_);
+ for (std::size_t i = 0; i < bytes_read; ++i) {
+ if (buffer[i] == '\n') {
+ bytes_to_copy = i + 1;
+ has_reached_end = true;
+ break;
}
}
-
- // Append the contents of the buffer to '*row_string', and see if we've
- // reached a genuine row-terminator yet.
- row_string->append(row_buffer);
- if (removeRowTerminator(row_string)) {
- row_string->push_back(field_terminator_);
- return true;
+ if (!has_reached_end && bytes_read != dynamic_read_size) {
+ has_reached_end = true;
}
- }
-}
-bool TextScanWorkOrder::readRowFromBlob(const char **start_pos,
- const char *end_pos,
- std::string *row_string) const {
- while (*start_pos != end_pos) {
- const char *next_newline = static_cast<const char*>(std::memchr(
- *start_pos,
- '\n',
- end_pos - *start_pos));
-
- if (next_newline == nullptr) {
- throw TextScanFormatError("File ended without delimiter");
- }
+ row_string.append(buffer, bytes_to_copy);
+ dynamic_read_size = std::min(dynamic_read_size * 2, kSmallBufferSize);
+ } while (!has_reached_end);
- // Append the blob's contents through the next newline to '*row_string',
- // and see if we've reached a genuine row-terminator yet.
- row_string->append(*start_pos, next_newline - *start_pos + 1);
- *start_pos = next_newline + 1;
- if (removeRowTerminator(row_string)) {
- row_string->push_back(field_terminator_);
- return true;
+ if (!row_string.empty()) {
+ if (row_string.back() != '\n') {
+ row_string.push_back('\n');
}
+ row_ptr = row_string.c_str();
+ tuples.emplace_back(parseRow(&row_ptr, relation));
}
- if (row_string->empty()) {
- return false;
- } else {
- throw TextScanFormatError("File ended without delimiter");
- }
-}
-
-bool TextScanWorkOrder::removeRowTerminator(std::string *row_string) const {
- unsigned row_term_chars = DetectRowTerminator(row_string->c_str(),
- row_string->length(),
- process_escape_sequences_);
- if (row_term_chars == 0) {
- return false;
- } else {
- row_string->resize(row_string->length() - row_term_chars);
- return true;
- }
-}
-
-bool TextScanWorkOrder::extractFieldString(const std::string &row_string,
- std::size_t *start_pos,
- std::string *field_string) const {
- // Check for NULL literal string.
- if (process_escape_sequences_
- && (row_string.length() - *start_pos >= 3)
- && (row_string[*start_pos] == '\\')
- && (row_string[*start_pos + 1] == 'N')
- && (row_string[*start_pos + 2] == field_terminator_)) {
- *start_pos += 3;
- return false;
- }
-
- // Scan up until terminator, expanding backslashed escape sequences as we go.
- std::size_t terminator_pos = row_string.find(field_terminator_, *start_pos);
- std::size_t scan_pos = *start_pos;
-
- if (process_escape_sequences_) {
- for (;;) {
- std::size_t backslash_pos = row_string.find('\\', scan_pos);
- if ((backslash_pos == std::string::npos) || (backslash_pos >= terminator_pos)) {
- // No more backslashes, or the next backslash is beyond the field
- // terminator.
- break;
- }
-
- // Copy up to the backslash.
- field_string->append(row_string, scan_pos, backslash_pos - scan_pos);
-
- if (backslash_pos + 1 == terminator_pos) {
- // The terminator we found was escaped by a backslash, so append the
- // literal terminator and re-scan for the next terminator character.
- field_string->push_back(field_terminator_);
- scan_pos = terminator_pos + 1;
- terminator_pos = row_string.find(field_terminator_, scan_pos);
- continue;
+ std::fclose(file);
+ free(buffer);
+
+ // Store the tuples in a ColumnVectorsValueAccessor for bulk insert.
+ ColumnVectorsValueAccessor column_vectors;
+ std::size_t attr_id = 0;
+ for (const auto &attribute : relation) {
+ const Type &attr_type = attribute.getType();
+ if (attr_type.isVariableLength()) {
+ std::unique_ptr<IndirectColumnVector> column(
+ new IndirectColumnVector(attr_type, tuples.size()));
+ for (const auto &tuple : tuples) {
+ column->appendTypedValue(tuple.getAttributeValue(attr_id));
}
-
- // Expand escape sequence.
- switch (row_string[backslash_pos + 1]) {
- case '0': // Fallthrough for octal digits.
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- // Octal char literal.
- scan_pos = backslash_pos + 1;
- field_string->push_back(ParseOctalLiteral(row_string, &scan_pos));
- break;
- case 'N': {
- // Null literal after some other column data.
- throw TextScanFormatError(
- "Null indicator '\\N' encountered in text scan mixed in with "
- "other column data.");
- }
- case '\\':
- // Backslash.
- field_string->push_back('\\');
- scan_pos = backslash_pos + 2;
- break;
- case 'b':
- // Backspace.
- field_string->push_back('\b');
- scan_pos = backslash_pos + 2;
- break;
- case 'f':
- // Form-feed.
- field_string->push_back('\f');
- scan_pos = backslash_pos + 2;
- break;
- case 'n':
- // Newline.
- field_string->push_back('\n');
- scan_pos = backslash_pos + 2;
- break;
- case 'r':
- // Carriage return.
- field_string->push_back('\r');
- scan_pos = backslash_pos + 2;
- break;
- case 't':
- // Tab.
- field_string->push_back('\t');
- scan_pos = backslash_pos + 2;
- break;
- case 'v':
- // Vertical tab.
- field_string->push_back('\v');
- scan_pos = backslash_pos + 2;
- break;
- case 'x':
- if ((backslash_pos + 2 < row_string.length()) && std::isxdigit(row_string[backslash_pos + 2])) {
- // Hexidecimal char literal.
- scan_pos = backslash_pos + 2;
- field_string->push_back(ParseHexLiteral(row_string, &scan_pos));
- } else {
- // Just an escaped 'x' with no hex digits.
- field_string->push_back('x');
- scan_pos = backslash_pos + 2;
- }
- break;
- default:
- // Append escaped character as-is.
- field_string->push_back(row_string[backslash_pos + 1]);
- scan_pos = backslash_pos + 2;
- break;
+ column_vectors.addColumn(column.release());
+ } else {
+ std::unique_ptr<NativeColumnVector> column(
+ new NativeColumnVector(attr_type, tuples.size()));
+ for (const auto &tuple : tuples) {
+ column->appendTypedValue(tuple.getAttributeValue(attr_id));
}
+ column_vectors.addColumn(column.release());
}
+ ++attr_id;
}
- DCHECK_NE(terminator_pos, std::string::npos);
- field_string->append(row_string, scan_pos, terminator_pos - scan_pos);
- *start_pos = terminator_pos + 1;
- return true;
+ // Bulk insert the tuples.
+ output_destination_->bulkInsertTuples(&column_vectors);
}
-Tuple TextScanWorkOrder::parseRow(const std::string &row_string, const CatalogRelationSchema &relation) const {
+Tuple TextScanWorkOrder::parseRow(const char **row_ptr,
+ const CatalogRelationSchema &relation) const {
std::vector<TypedValue> attribute_values;
- std::size_t pos = 0;
+ bool is_null_literal;
+ bool has_reached_end_of_line = false;
std::string value_str;
- CatalogRelationSchema::const_iterator attr_it = relation.begin();
- while (pos < row_string.length()) {
- if (attr_it == relation.end()) {
- throw TextScanFormatError("Row has too many fields");
+ for (const auto &attr : relation) {
+ if (has_reached_end_of_line) {
+ throw TextScanFormatError("Row has too few fields");
}
value_str.clear();
- if (extractFieldString(row_string, &pos, &value_str)) {
- attribute_values.emplace_back();
- if (!attr_it->getType().parseValueFromString(value_str, &(attribute_values.back()))) {
- throw TextScanFormatError("Failed to parse value");
- }
- } else {
+ extractFieldString(row_ptr,
+ &is_null_literal,
+ &has_reached_end_of_line,
+ &value_str);
+
+ if (is_null_literal) {
// NULL literal.
- if (!attr_it->getType().isNullable()) {
+ if (!attr.getType().isNullable()) {
throw TextScanFormatError(
"NULL literal '\\N' was specified for a column with a "
"non-nullable Type");
}
-
- attribute_values.emplace_back(attr_it->getType().makeNullValue());
+ attribute_values.emplace_back(attr.getType().makeNullValue());
+ } else {
+ attribute_values.emplace_back();
+ if (!attr.getType().parseValueFromString(value_str, &(attribute_values.back()))) {
+ throw TextScanFormatError("Failed to parse value");
+ }
}
-
- ++attr_it;
}
- if (attr_it != relation.end()) {
- throw TextScanFormatError("Row has too few fields");
+ if (!has_reached_end_of_line) {
+ throw TextScanFormatError("Row has too many fields");
}
return Tuple(std::move(attribute_values));
}
-void TextSplitWorkOrder::execute() {
- std::FILE *file = std::fopen(filename_.c_str(), "r");
- if (!file) {
- throw TextScanReadError(filename_);
- }
-
- bool eof = false;
- do {
- // Allocate new blob, if current is empty.
- if (0 == remainingBlobBytes()) {
- allocateBlob();
- }
-
- // Read the into the unwritten part of blob.
- std::size_t bytes =
- std::fread(writeableBlobAddress(), 1, remainingBlobBytes(), file);
- eof = bytes < remainingBlobBytes();
- written_ += bytes;
-
- // Write the current blob to queue for processing.
- sendBlobInfoToOperator(!eof /* write_row_aligned */);
- } while (!eof);
-
- std::fclose(file);
+void TextScanWorkOrder::extractFieldString(const char **field_ptr,
+ bool *is_null_literal,
+ bool *has_reached_end_of_line,
+ std::string *field_string) const {
+ const char *cur_ptr = *field_ptr;
+ *is_null_literal = false;
- // Notify the operator about the completion of this Work Order.
- FeedbackMessage msg(TextScanOperator::kSplitWorkOrderCompletionMessage,
- operator_index_,
- nullptr /* payload */,
- 0 /* payload_size */,
- false /* ownership */);
- SendFeedbackMessage(bus_, ClientIDMap::Instance()->getValue(), scheduler_client_id_, msg);
-}
+ // Check for NULL literal string.
+ if (process_escape_sequences_ && cur_ptr[0] == '\\' && cur_ptr[1] == 'N') {
+ cur_ptr += 2;
-// Allocate new blob.
-void TextSplitWorkOrder::allocateBlob() {
- text_blob_id_ = storage_manager_->createBlob(FLAGS_textscan_split_blob_size);
- text_blob_ = storage_manager_->getBlobMutable(text_blob_id_);
- blob_size_ = text_blob_->size();
- written_ = 0;
-}
+ // Skip '\r'
+ if (*cur_ptr == '\r') {
+ ++cur_ptr;
+ }
-// Find the last row terminator in the blob.
-std::size_t TextSplitWorkOrder::findLastRowTerminator() {
- std::size_t found = 0;
- const char *blob = static_cast<const char *>(text_blob_->getMemory());
-
- for (std::size_t index = written_;
- index != 0;
- --index) {
- if (DetectRowTerminator(blob, index, process_escape_sequences_)) {
- found = index;
- break;
+ const char c = *cur_ptr;
+ if (c == field_terminator_ || c == '\n') {
+ *is_null_literal = true;
+ *has_reached_end_of_line = (c == '\n');
+ *field_ptr = cur_ptr + 1;
+ return;
}
}
- // TODO(quickstep-team): Design a way to handle long rows that are larger than
- // the configured blob size.
- CHECK_NE(0u, found) << "No row terminator found in " << FLAGS_textscan_split_blob_size
- << "-slot chunk of " << filename_;
- return found;
-}
+ // Not a NULL literal string, rewind cur_ptr to the start position for parsing.
+ cur_ptr = *field_ptr;
-void TextSplitWorkOrder::sendBlobInfoToOperator(const bool write_row_aligned) {
- std::size_t text_len = written_;
- std::string residue;
- if (write_row_aligned) {
- // Find last row terminator in current blob.
- text_len = findLastRowTerminator();
-
- // Copy the residual bytes after the last row terminator.
- residue = std::string(
- static_cast<char *>(text_blob_->getMemoryMutable()) + text_len,
- written_ - text_len);
- }
+ if (!process_escape_sequences_) {
+ // Simply copy until field_terminator or '\n'.
+ for (;; ++cur_ptr) {
+ const char c = *cur_ptr;
+ if (c == field_terminator_) {
+ *has_reached_end_of_line = false;
+ break;
+ } else if (c == '\n') {
+ *has_reached_end_of_line = true;
+ break;
+ }
- // Notify the operator for the split-up blob.
- serialization::TextBlob proto;
- proto.set_blob_id(text_blob_id_);
- proto.set_size(text_len);
-
- const std::size_t payload_size = proto.ByteSize();
- // NOTE(zuyu): 'payload' gets released by FeedbackMessage's destructor.
- char *payload = static_cast<char *>(std::malloc(payload_size));
- CHECK(proto.SerializeToArray(payload, payload_size));
-
- const tmb::client_id worker_thread_client_id = ClientIDMap::Instance()->getValue();
- FeedbackMessage feedback_msg(TextScanOperator::kNewTextBlobMessage,
- operator_index_,
- payload,
- payload_size);
- SendFeedbackMessage(bus_, worker_thread_client_id, scheduler_client_id_, feedback_msg);
-
- // Notify Foreman for the avaiable work order on the blob.
- serialization::WorkOrdersAvailableMessage message_proto;
- message_proto.set_operator_index(operator_index_);
-
- // NOTE(zuyu): Using the heap memory to serialize proto as a c-like string.
- const size_t message_proto_length = message_proto.ByteSize();
- char *message_proto_bytes = static_cast<char*>(std::malloc(message_proto_length));
- CHECK(message_proto.SerializeToArray(message_proto_bytes, message_proto_length));
-
- tmb::TaggedMessage tagged_message(static_cast<const void *>(message_proto_bytes),
- message_proto_length,
- kWorkOrdersAvailableMessage);
- std::free(message_proto_bytes);
-
- // Send new work order available message to Foreman.
- const tmb::MessageBus::SendStatus send_status =
- QueryExecutionUtil::SendTMBMessage(
- bus_,
- worker_thread_client_id,
- scheduler_client_id_,
- std::move(tagged_message));
- CHECK(send_status == tmb::MessageBus::SendStatus::kOK) << "Message could not "
- "be sent from thread with TMB client ID "
- << worker_thread_client_id << " to Foreman with TMB client "
- "ID " << scheduler_client_id_;
-
- if (residue.size()) {
- // Allocate new blob, and copy residual bytes from last blob.
- allocateBlob();
- std::memcpy(writeableBlobAddress(), residue.data(), residue.size());
- written_ += residue.size();
+ // Ignore '\r'
+ if (c != '\r') {
+ field_string->push_back(c);
+ }
+ }
+ } else {
+ for (;; ++cur_ptr) {
+ const char c = *cur_ptr;
+ if (c == '\\') {
+ ++cur_ptr;
+ const char first_escaped_character = *cur_ptr;
+ switch (first_escaped_character) {
+ case '0': // Fallthrough for octal digits.
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ field_string->push_back(ParseOctalLiteral(&cur_ptr));
+ break;
+ case 'N': {
+ // Null literal after some other column data.
+ throw TextScanFormatError(
+ "Null indicator '\\N' encountered in text scan mixed in with "
+ "other column data.");
+ }
+ case '\\':
+ // Backslash.
+ field_string->push_back('\\');
+ break;
+ case 'b':
+ // Backspace.
+ field_string->push_back('\b');
+ break;
+ case 'f':
+ // Form-feed.
+ field_string->push_back('\f');
+ break;
+ case 'n':
+ // Newline.
+ field_string->push_back('\n');
+ break;
+ case 'r':
+ // Carriage return.
+ field_string->push_back('\r');
+ break;
+ case 't':
+ // Tab.
+ field_string->push_back('\t');
+ break;
+ case 'v':
+ // Vertical tab.
+ field_string->push_back('\v');
+ break;
+ case 'x':
+ if (std::isxdigit(cur_ptr[1])) {
+ // Hexidecimal char literal.
+ ++cur_ptr;
+ field_string->push_back(ParseHexLiteral(&cur_ptr));
+ } else {
+ // Just an escaped 'x' with no hex digits.
+ field_string->push_back('x');
+ }
+ break;
+ case '\n':
+ throw TextScanFormatError(
+ "Backslash line splicing is not supported.");
+ default:
+ // Append escaped character as-is.
+ field_string->push_back(first_escaped_character);
+ break;
+ }
+ } else if (c == field_terminator_) {
+ *has_reached_end_of_line = false;
+ break;
+ } else if (c == '\n') {
+ *has_reached_end_of_line = true;
+ break;
+ } else {
+ if (c != '\r') {
+ // Ignore '\r'
+ field_string->push_back(c);
+ }
+ }
+ }
}
+ *field_ptr = cur_ptr + 1;
}
} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/55b06fab/relational_operators/TextScanOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/TextScanOperator.hpp b/relational_operators/TextScanOperator.hpp
index 3cda65b..d73e7dd 100644
--- a/relational_operators/TextScanOperator.hpp
+++ b/relational_operators/TextScanOperator.hpp
@@ -1,6 +1,8 @@
/**
* Copyright 2011-2015 Quickstep Technologies LLC.
* Copyright 2015-2016 Pivotal Software, Inc.
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin\u2014Madison.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -18,26 +20,18 @@
#ifndef QUICKSTEP_RELATIONAL_OPERATORS_TEXT_SCAN_OPERATOR_HPP_
#define QUICKSTEP_RELATIONAL_OPERATORS_TEXT_SCAN_OPERATOR_HPP_
-#include <atomic>
+#include <cctype>
#include <cstddef>
-#include <cstdint>
-#include <cstdio>
#include <exception>
#include <string>
#include "catalog/CatalogRelation.hpp"
#include "catalog/CatalogTypedefs.hpp"
#include "query_execution/QueryContext.hpp"
-#include "query_execution/QueryExecutionTypedefs.hpp"
#include "relational_operators/RelationalOperator.hpp"
#include "relational_operators/WorkOrder.hpp"
-#include "storage/StorageBlob.hpp"
-#include "storage/StorageBlockInfo.hpp"
#include "types/containers/Tuple.hpp"
#include "utility/Macros.hpp"
-#include "utility/ThreadSafeQueue.hpp"
-
-#include "glog/logging.h"
#include "tmb/id_typedefs.h"
@@ -98,26 +92,11 @@ class TextScanFormatError : public std::exception {
};
/**
- * @brief A structure for text data blobs.
- */
-struct TextBlob {
- TextBlob(const block_id text_blob_id, const std::size_t text_size)
- : blob_id(text_blob_id), size(text_size) {}
- block_id blob_id;
- std::size_t size;
-};
-
-/**
* @brief An operator which reads tuples from a text file and inserts them into
* a relation.
**/
class TextScanOperator : public RelationalOperator {
public:
- enum FeedbackMessageType : WorkOrder::FeedbackMessageType {
- kNewTextBlobMessage,
- kSplitWorkOrderCompletionMessage,
- };
-
/**
* @brief Constructor
*
@@ -130,29 +109,22 @@ class TextScanOperator : public RelationalOperator {
* the text file.
* @param process_escape_sequences Whether to decode escape sequences in the
* text file.
- * @param parallelize_load Parallelize the load process by th spliting file
- * into blobs, and generating separate work-orders for each of them.
* @param output_relation The output relation.
* @param output_destination_index The index of the InsertDestination in the
* QueryContext to insert tuples.
**/
- TextScanOperator(
- const std::size_t query_id,
- const std::string &file_pattern,
- const char field_terminator,
- const bool process_escape_sequences,
- const bool parallelize_load,
- const CatalogRelation &output_relation,
- const QueryContext::insert_destination_id output_destination_index)
+ TextScanOperator(const std::size_t query_id,
+ const std::string &file_pattern,
+ const char field_terminator,
+ const bool process_escape_sequences,
+ const CatalogRelation &output_relation,
+ const QueryContext::insert_destination_id output_destination_index)
: RelationalOperator(query_id),
file_pattern_(file_pattern),
field_terminator_(field_terminator),
process_escape_sequences_(process_escape_sequences),
- parallelize_load_(parallelize_load),
output_relation_(output_relation),
output_destination_index_(output_destination_index),
- num_done_split_work_orders_(0),
- num_split_work_orders_(0),
work_generated_(false) {}
~TextScanOperator() override {}
@@ -171,23 +143,14 @@ class TextScanOperator : public RelationalOperator {
return output_relation_.getID();
}
- void receiveFeedbackMessage(const WorkOrder::FeedbackMessage &msg) override;
-
private:
const std::string file_pattern_;
const char field_terminator_;
const bool process_escape_sequences_;
- const bool parallelize_load_;
const CatalogRelation &output_relation_;
const QueryContext::insert_destination_id output_destination_index_;
- ThreadSafeQueue<TextBlob> text_blob_queue_;
- std::atomic<std::uint32_t> num_done_split_work_orders_;
- std::uint32_t num_split_work_orders_;
-
- // Indicates if work order to load file is generated for non-parallel load, and
- // if work order to split file to blobs is generated for parallel load.
bool work_generated_;
DISALLOW_COPY_AND_ASSIGN(TextScanOperator);
@@ -203,7 +166,9 @@ class TextScanWorkOrder : public WorkOrder {
*
* @param query_id The ID of the query to which this WorkOrder belongs.
* @param filename The name of the text file to bulk insert.
- * @param field_terminator The string which separates attribute values in
+ * @param text_offset The start position in the text file to start text scan.
+ * @param text_segment_size The size of text segment to be scanned.
+ * @param field_terminator The character which separates attribute values in
* the text file.
* @param process_escape_sequences Whether to decode escape sequences in the
* text file.
@@ -213,28 +178,8 @@ class TextScanWorkOrder : public WorkOrder {
TextScanWorkOrder(
const std::size_t query_id,
const std::string &filename,
- const char field_terminator,
- const bool process_escape_sequences,
- InsertDestination *output_destination,
- StorageManager *storage_manager);
-
- /**
- * @brief Constructor.
- *
- * @param query_id The ID of the query to which this WorkOrder belongs.
- * @param text_blob Blob ID containing the data to be scanned.
- * @param text_size Size of the data in the blob.
- * @param field_terminator The character which separates attribute values in
- * the text file.
- * @param process_escape_sequences Whether to decode escape sequences in the
- * text file.
- * @param output_destination The InsertDestination to write the read tuples.
- * @param storage_manager The StorageManager to use.
- */
- TextScanWorkOrder(
- const std::size_t query_id,
- const block_id text_blob,
- const std::size_t text_size,
+ const std::size_t text_offset,
+ const std::size_t text_segment_size,
const char field_terminator,
const bool process_escape_sequences,
InsertDestination *output_destination,
@@ -255,141 +200,106 @@ class TextScanWorkOrder : public WorkOrder {
void execute() override;
private:
- // Parse up to three octal digits (0-7) starting at '*start_pos' in
- // 'row_string' as a char literal. '*start_pos' will be modified to
- // the first position AFTER the parsed octal digits.
- static char ParseOctalLiteral(const std::string &row_string,
- std::size_t *start_pos);
-
- // Parse up to two hexadecimal digits (0-F, case insensitive) starting at
- // '*start_pos' in 'row_string' as a char literal. '*start_pos' will be
- // modified to the first position AFTER the parsed hexadecimal digits.
- static char ParseHexLiteral(const std::string &row_string,
- std::size_t *start_pos);
-
- // Read the next text row from the open FILE stream '*file' into
- // '*row_string'. Returns false if end-of-file is reached and there are no
- // more rows, true if a row string was successfully read. For ease of
- // parsing, '*row_string' has the trailing row-terminator removed and
- // replaced with a field-terminator.
- bool readRowFromFile(FILE *file, std::string *row_string) const;
-
- // Read the next text from blob memory starting at '**start_pos' and ending
- // at '*end_pos' into '*row_string'. Returns false if the end of the blob is
- // reached and there are no more rows, true if a row was successfully read.
- // For ease of parsing, '*row_string' has the trailing row-terminator removed
- // and replaced with a field-terminator. After call '*start_pos' points to
- // first character AFTER the read row in the blob.
- bool readRowFromBlob(const char **start_pos,
- const char *end_pos,
- std::string *row_string) const;
-
- // Trim a row-terminator (newline or carriage-return + newline) off the end
- // of '*row_string'. Returns true if the row-terminator was successfully
- // removed, false if '*row_string' did not end in a row-terminator.
- bool removeRowTerminator(std::string *row_string) const;
-
- // Extract a field string starting at '*start_pos' in 'row_string' into
- // '*field_string'. This method also expands escape sequences if
- // 'process_escape_sequences_' is true. Returns true if a field string was
- // successfully extracted, false in the special case where the NULL-literal
- // string "\N" was found. Throws TextScanFormatError if text was malformed.
- bool extractFieldString(const std::string &row_string,
- std::size_t *start_pos,
- std::string *field_string) const;
-
- // Make a tuple by parsing all of the individual fields specified in
- // 'row_string'.
- Tuple parseRow(const std::string &row_string, const CatalogRelationSchema &relation) const;
-
- const bool is_file_;
- const std::string filename_;
- const char field_terminator_;
- const block_id text_blob_;
- const std::size_t text_size_;
- const bool process_escape_sequences_;
-
- InsertDestination *output_destination_;
- StorageManager *storage_manager_;
-
- DISALLOW_COPY_AND_ASSIGN(TextScanWorkOrder);
-};
-
-/**
- * @brief A WorkOrder to split the file into blobs of text that can be processed
- * separately.
- **/
-class TextSplitWorkOrder : public WorkOrder {
- public:
/**
- * @brief Constructor.
+ * @brief Extract a field string starting at \p *field_ptr. This method also
+ * expands escape sequences if \p process_escape_sequences_ is true.
+ * Throws TextScanFormatError if text was malformed.
*
- * @param query_id The ID of the query to which this WorkOrder belongs.
- * @param filename File to split into row-aligned blobs.
- * @param process_escape_sequences Whether to decode escape sequences in the
- * text file.
- * @param storage_manager The StorageManager to use.
- * @param operator_index Operator index of the current operator. This is used
- * to send new-work available message to Foreman.
- * @param scheduler_client_id The TMB client ID of the scheduler thread.
- * @param bus A pointer to the TMB.
+ * @param field_ptr \p *field_ptr points to the current position of the input
+ * char stream for parsing. The overall char stream must end with a
+ * newline character. After the call, \p *field_ptr will be modified to
+ * the start position of the NEXT field string.
+ * @param is_null_literal OUTPUT parameter. Set to true if the NULL-literal
+ * string "\N" was found.
+ * @param has_reached_end_of_line OUTPUT parameter. Set to true if the newline
+ * character was encountered.
+ * @param field_string OUTPUT parameter. Set to the extracted field string.
*/
- TextSplitWorkOrder(const std::size_t query_id,
- const std::string &filename,
- const bool process_escape_sequences,
- StorageManager *storage_manager,
- const std::size_t operator_index,
- const tmb::client_id scheduler_client_id,
- MessageBus *bus)
- : WorkOrder(query_id),
- filename_(filename),
- process_escape_sequences_(process_escape_sequences),
- storage_manager_(DCHECK_NOTNULL(storage_manager)),
- operator_index_(operator_index),
- scheduler_client_id_(scheduler_client_id),
- bus_(DCHECK_NOTNULL(bus)) {}
+ void extractFieldString(const char **field_ptr,
+ bool *is_null_literal,
+ bool *has_reached_end_of_line,
+ std::string *field_string) const;
/**
- * @exception TextScanReadError The text file could not be opened for
- * reading.
+ * @brief Make a tuple by parsing all of the individual fields from a char stream.
+ *
+ * @param \p *row_ptr points to the current position of the input char stream
+ * for parsing. The overall char stream must end with a newline character.
+ * After the call, \p *row_ptr will be modified to the start position of
+ * the NEXT text row.
+ * @param relation The relation schema for the tuple.
+ * @return The tuple parsed from the char stream.
*/
- void execute() override;
-
- private:
- // Allocate a new blob.
- void allocateBlob();
-
- // Find the last row terminator in current blob.
- std::size_t findLastRowTerminator();
+ Tuple parseRow(const char **row_ptr,
+ const CatalogRelationSchema &relation) const;
- // Send the blob info to its operator via TMB.
- void sendBlobInfoToOperator(const bool write_row_aligned);
- // Get the writeable address (unwritten chunk) in current blob.
- inline char* writeableBlobAddress() {
- return static_cast<char*>(text_blob_->getMemoryMutable()) + written_;
+ /**
+ * @brief Parse up to three octal digits (0-7) starting at \p *literal_ptr as
+ * a char literal. \p *literal_ptr will be modified to the last position
+ * of the parsed octal digits.
+ *
+ * @param literal_ptr \p *literal_ptr points to the current position of the
+ * input char stream for parsing. The overall char stream must end with
+ * a newline character.
+ * @return The char literal from the parsed octal digits.
+ */
+ inline static char ParseOctalLiteral(const char **literal_ptr) {
+ int value = 0;
+ const char *ptr = *literal_ptr;
+ for (int i = 0; i < 3; ++i, ++ptr) {
+ const int char_value = *ptr - '0';
+ if ((char_value >= 0) && (char_value < 8)) {
+ value = value * 8 + char_value;
+ } else {
+ break;
+ }
+ }
+ *literal_ptr = ptr - 1;
+ return value;
}
- // Number of bytes remaining to be written.
- inline std::size_t remainingBlobBytes() const {
- return blob_size_ - written_;
+ /**
+ * @brief Parse up to two hexadecimal digits (0-F, case insensitive) starting
+ * at \p *literal_ptr as a char literal. \p *literal_ptr will be modified
+ * to the last position of the parsed octal digits.
+ *
+ * @param literal_ptr \p *literal_ptr points to the current position of the
+ * input char stream for parsing. The overall char stream must end with
+ * a newline character.
+ * @return The char literal from the parsed hexadecimal digits.
+ */
+ inline static char ParseHexLiteral(const char **literal_ptr) {
+ int value = 0;
+ const char *ptr = *literal_ptr;
+ for (int i = 0; i < 2; ++i, ++ptr) {
+ const char c = *ptr;
+ int char_value;
+ if (std::isdigit(c)) {
+ char_value = c - '0';
+ } else if (c >= 'a' && c <= 'f') {
+ char_value = c - 'a' + 10;
+ } else if (c >= 'A' && c <= 'F') {
+ char_value = c - 'A' + 10;
+ } else {
+ break;
+ }
+ value = value * 16 + char_value;
+ }
+ *literal_ptr = ptr - 1;
+ return value;
}
- const std::string filename_; // File to split.
+ const std::string filename_;
+ const std::size_t text_offset_;
+ const std::size_t text_segment_size_;
+ const char field_terminator_;
const bool process_escape_sequences_;
+ InsertDestination *output_destination_;
StorageManager *storage_manager_;
- const std::size_t operator_index_; // Opeartor index.
- const tmb::client_id scheduler_client_id_; // The scheduler's TMB client ID.
- MessageBus *bus_;
-
- MutableBlobReference text_blob_; // Mutable reference to current blob.
- block_id text_blob_id_; // Current blob ID.
- std::size_t written_ = 0; // Bytes written in current blob.
- std::size_t blob_size_ = 0; // Size of the current blob.
-
- DISALLOW_COPY_AND_ASSIGN(TextSplitWorkOrder);
+ DISALLOW_COPY_AND_ASSIGN(TextScanWorkOrder);
};
/** @} */
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/55b06fab/relational_operators/WorkOrder.proto
----------------------------------------------------------------------
diff --git a/relational_operators/WorkOrder.proto b/relational_operators/WorkOrder.proto
index fd731f7..60d4c8f 100644
--- a/relational_operators/WorkOrder.proto
+++ b/relational_operators/WorkOrder.proto
@@ -1,5 +1,7 @@
// Copyright 2011-2015 Quickstep Technologies LLC.
// Copyright 2015-2016 Pivotal Software, Inc.
+// Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+// University of Wisconsin\u2014Madison.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -18,7 +20,6 @@ syntax = "proto2";
package quickstep.serialization;
import "relational_operators/SortMergeRunOperator.proto";
-import "relational_operators/TextScanOperator.proto";
enum WorkOrderType {
AGGREGATION = 1;
@@ -39,8 +40,7 @@ enum WorkOrderType {
SORT_RUN_GENERATION = 16;
TABLE_GENERATOR = 17;
TEXT_SCAN = 18;
- TEXT_SPLIT = 19;
- UPDATE = 20;
+ UPDATE = 19;
}
message WorkOrder {
@@ -223,15 +223,12 @@ message TableGeneratorWorkOrder {
message TextScanWorkOrder {
extend WorkOrder {
// All required.
+ optional string filename = 301;
+ optional uint64 text_offset = 302;
+ optional uint64 text_segment_size = 303;
optional uint32 field_terminator = 304; // For one-byte char.
optional bool process_escape_sequences = 305;
optional int32 insert_destination_index = 306;
-
- // Either
- optional string filename = 307;
-
- // Or
- optional TextBlob text_blob = 308;
}
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/55b06fab/relational_operators/WorkOrderFactory.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/WorkOrderFactory.cpp b/relational_operators/WorkOrderFactory.cpp
index 489b666..da42b4d 100644
--- a/relational_operators/WorkOrderFactory.cpp
+++ b/relational_operators/WorkOrderFactory.cpp
@@ -42,7 +42,6 @@
#include "relational_operators/SortRunGenerationOperator.hpp"
#include "relational_operators/TableGeneratorOperator.hpp"
#include "relational_operators/TextScanOperator.hpp"
-#include "relational_operators/TextScanOperator.pb.h"
#include "relational_operators/UpdateOperator.hpp"
#include "relational_operators/WorkOrder.pb.h"
#include "storage/StorageBlockInfo.hpp"
@@ -389,40 +388,16 @@ WorkOrder* WorkOrderFactory::ReconstructFromProto(const serialization::WorkOrder
}
case serialization::TEXT_SCAN: {
LOG(INFO) << "Creating TextScanWorkOrder";
- if (proto.HasExtension(serialization::TextScanWorkOrder::filename)) {
- return new TextScanWorkOrder(
- proto.query_id(),
- proto.GetExtension(serialization::TextScanWorkOrder::filename),
- proto.GetExtension(serialization::TextScanWorkOrder::field_terminator),
- proto.GetExtension(serialization::TextScanWorkOrder::process_escape_sequences),
- query_context->getInsertDestination(
- proto.GetExtension(serialization::TextScanWorkOrder::insert_destination_index)),
- storage_manager);
- } else {
- const serialization::TextBlob &text_blob_proto =
- proto.GetExtension(serialization::TextScanWorkOrder::text_blob);
-
- return new TextScanWorkOrder(
- proto.query_id(),
- text_blob_proto.blob_id(),
- text_blob_proto.size(),
- proto.GetExtension(serialization::TextScanWorkOrder::field_terminator),
- proto.GetExtension(serialization::TextScanWorkOrder::process_escape_sequences),
- query_context->getInsertDestination(
- proto.GetExtension(serialization::TextScanWorkOrder::insert_destination_index)),
- storage_manager);
- }
- }
- case serialization::TEXT_SPLIT: {
- LOG(INFO) << "Creating TextSplitWorkOrder";
- return new TextSplitWorkOrder(
+ return new TextScanWorkOrder(
proto.query_id(),
- proto.GetExtension(serialization::TextSplitWorkOrder::filename),
- proto.GetExtension(serialization::TextSplitWorkOrder::process_escape_sequences),
- storage_manager,
- proto.GetExtension(serialization::TextSplitWorkOrder::operator_index),
- shiftboss_client_id,
- bus);
+ proto.GetExtension(serialization::TextScanWorkOrder::filename),
+ proto.GetExtension(serialization::TextScanWorkOrder::text_offset),
+ proto.GetExtension(serialization::TextScanWorkOrder::text_segment_size),
+ proto.GetExtension(serialization::TextScanWorkOrder::field_terminator),
+ proto.GetExtension(serialization::TextScanWorkOrder::process_escape_sequences),
+ query_context->getInsertDestination(
+ proto.GetExtension(serialization::TextScanWorkOrder::insert_destination_index)),
+ storage_manager);
}
case serialization::UPDATE: {
LOG(INFO) << "Creating UpdateWorkOrder";
@@ -691,27 +666,14 @@ bool WorkOrderFactory::ProtoIsValid(const serialization::WorkOrder &proto,
proto.GetExtension(serialization::TableGeneratorWorkOrder::insert_destination_index));
}
case serialization::TEXT_SCAN: {
- if (!proto.HasExtension(serialization::TextScanWorkOrder::field_terminator) ||
- !proto.HasExtension(serialization::TextScanWorkOrder::process_escape_sequences) ||
- !proto.HasExtension(serialization::TextScanWorkOrder::insert_destination_index) ||
- !query_context.isValidInsertDestinationId(
- proto.GetExtension(serialization::TextScanWorkOrder::insert_destination_index))) {
- return false;
- }
-
- // Two fields are exclusive.
- if (proto.HasExtension(serialization::TextScanWorkOrder::filename) ==
- proto.HasExtension(serialization::TextScanWorkOrder::text_blob)) {
- return false;
- }
-
- return proto.HasExtension(serialization::TextScanWorkOrder::filename) ||
- proto.GetExtension(serialization::TextScanWorkOrder::text_blob).IsInitialized();
- }
- case serialization::TEXT_SPLIT: {
- return proto.HasExtension(serialization::TextSplitWorkOrder::filename) &&
- proto.HasExtension(serialization::TextSplitWorkOrder::process_escape_sequences) &&
- proto.HasExtension(serialization::TextSplitWorkOrder::operator_index);
+ return proto.HasExtension(serialization::TextScanWorkOrder::filename) &&
+ proto.HasExtension(serialization::TextScanWorkOrder::text_offset) &&
+ proto.HasExtension(serialization::TextScanWorkOrder::text_segment_size) &&
+ proto.HasExtension(serialization::TextScanWorkOrder::field_terminator) &&
+ proto.HasExtension(serialization::TextScanWorkOrder::process_escape_sequences) &&
+ proto.HasExtension(serialization::TextScanWorkOrder::insert_destination_index) &&
+ query_context.isValidInsertDestinationId(
+ proto.GetExtension(serialization::TextScanWorkOrder::insert_destination_index));
}
case serialization::UPDATE: {
return proto.HasExtension(serialization::UpdateWorkOrder::relation_id) &&
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/55b06fab/relational_operators/tests/TextScanOperator_unittest.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/tests/TextScanOperator_unittest.cpp b/relational_operators/tests/TextScanOperator_unittest.cpp
index ef6fc2d..5860745 100644
--- a/relational_operators/tests/TextScanOperator_unittest.cpp
+++ b/relational_operators/tests/TextScanOperator_unittest.cpp
@@ -193,7 +193,6 @@ TEST_F(TextScanOperatorTest, ScanTest) {
input_filename,
'\t',
true,
- false,
*relation_,
output_destination_index));
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/55b06fab/relational_operators/tests/text_scan_input.txt
----------------------------------------------------------------------
diff --git a/relational_operators/tests/text_scan_input.txt b/relational_operators/tests/text_scan_input.txt
index bcb76bf..51015bd 100644
--- a/relational_operators/tests/text_scan_input.txt
+++ b/relational_operators/tests/text_scan_input.txt
@@ -2,9 +2,5 @@
-1234567890 -1.2e-200 A twenty char string 1969-07-21 02:56:00 00:00:01.001 Another twenty chars
\N \N \N \N \N \N
\N \N \\N \N \N \\N
-\x34\062 \55\064\x32\56\65 \x7B\
-\t\ \\\e\s\c\a\p\e\d\x\b\n\x7d 1988-07-16\T00:00\:00\x2E0\x30\60\06001 00:00:00 'good\' \"bye"\r\n\
-\r\n\v\n\
-
-0 0.0 \\\\\
-\\\\\n 1970-01-01 0 s \\\\
+\x34\062 \55\064\x32\56\65 \x7B\n\t\ \\\e\s\c\a\p\e\d\x\b\n\x7d 1988-07-16\T00:00\:00\x2E0\x30\60\06001 00:00:00 'good\' \"bye"\r\n\n\r\n\v\n\n
+0 0.0 \\\\\n\\\\\n 1970-01-01 0 s \\\\
[02/12] incubator-quickstep git commit: Fix Clang problems in Travis.
Posted by hb...@apache.org.
Fix Clang problems in Travis.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/2d39b8ec
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/2d39b8ec
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/2d39b8ec
Branch: refs/heads/query-manager-used-in-foreman
Commit: 2d39b8ecd8b1ca3fb42ff2505a664a94b67ab9e3
Parents: eab1c9a
Author: Navneet Potti <na...@gmail.com>
Authored: Wed Jun 8 18:15:38 2016 -0500
Committer: Zuyu Zhang <zz...@pivotal.io>
Committed: Wed Jun 8 20:27:44 2016 -0700
----------------------------------------------------------------------
.travis.yml | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d39b8ec/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 08d6f38..df39fb0 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -12,7 +12,7 @@ cache: ccache
compiler:
- gcc
- # clang
+ - clang
env:
- BUILD_TYPE=Debug VECTOR_COPY_ELISION_LEVEL=joinwithbinaryexpressions
@@ -20,6 +20,19 @@ env:
- BUILD_TYPE=Debug VECTOR_COPY_ELISION_LEVEL=none
- BUILD_TYPE=Release VECTOR_COPY_ELISION_LEVEL=none
+before_install:
+ - LLVM_VERSION=3.7.1
+ - LLVM_ARCHIVE_PATH=$HOME/clang+llvm.tar.xz
+ - if [[ $CC = "clang" ]]; then
+ wget http://llvm.org/releases/$LLVM_VERSION/clang+llvm-$LLVM_VERSION-x86_64-linux-gnu-ubuntu-14.04.tar.xz -O $LLVM_ARCHIVE_PATH;
+ mkdir -p $HOME/clang-$LLVM_VERSION;
+ tar xf $LLVM_ARCHIVE_PATH -C $HOME/clang-$LLVM_VERSION --strip-components 1;
+ ln -sf $HOME/clang-$LLVM_VERSION/bin/clang++ $HOME/clang-$LLVM_VERSION/bin/clang++-3.7;
+ export PATH=$HOME/clang-$LLVM_VERSION/bin:$PATH;
+ export CPPFLAGS="-I $HOME/clang-$LLVM_VERSION/include/c++/v1";
+ echo "Using clang at " `which $CC-3.7` " and $CXX at " `which $CXX-3.7`;
+ fi
+
install:
- if [ "$VECTOR_COPY_ELISION_LEVEL" = "joinwithbinaryexpressions" ] && [ "$CC" = "gcc" ]; then
export MAKE_JOBS=1;
[11/12] incubator-quickstep git commit: Long lived Foreman thread
Posted by hb...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/query_execution/PolicyEnforcer.cpp
----------------------------------------------------------------------
diff --git a/query_execution/PolicyEnforcer.cpp b/query_execution/PolicyEnforcer.cpp
new file mode 100644
index 0000000..1ee1df9
--- /dev/null
+++ b/query_execution/PolicyEnforcer.cpp
@@ -0,0 +1,177 @@
+/**
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin\u2014Madison.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **/
+
+#include "query_execution/PolicyEnforcer.hpp"
+
+#include <cstddef>
+#include <memory>
+#include <queue>
+#include <utility>
+#include <unordered_map>
+#include <vector>
+
+#include "query_execution/QueryExecutionMessages.pb.h"
+#include "query_execution/QueryManager.hpp"
+#include "query_optimizer/QueryHandle.hpp"
+#include "relational_operators/WorkOrder.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+
+bool PolicyEnforcer::admitQuery(QueryHandle *query_handle) {
+ if (admitted_queries_.size() < kMaxConcurrentQueries) {
+ // Ok to admit the query.
+ const std::size_t query_id = query_handle->query_id();
+ if (admitted_queries_.find(query_id) == admitted_queries_.end()) {
+ admitted_queries_[query_id].reset(
+ new QueryManager(foreman_client_id_, num_numa_nodes_, query_handle,
+ catalog_database_, storage_manager_, bus_));
+ return true;
+ } else {
+ LOG(ERROR) << "Query with the same ID " << query_id << " exists";
+ return false;
+ }
+ } else {
+ // This query will have to wait.
+ waiting_queries_.push(query_handle);
+ return false;
+ }
+}
+
+void PolicyEnforcer::processMessage(const TaggedMessage &tagged_message) {
+ // TODO(harshad) : Provide processXMessage() public functions in
+ // QueryManager, so that we need to extract message from the
+ // TaggedMessage only once.
+ std::size_t query_id;
+ switch (tagged_message.message_type()) {
+ case kWorkOrderCompleteMessage: // Fall through.
+ case kRebuildWorkOrderCompleteMessage: {
+ serialization::WorkOrderCompletionMessage proto;
+ CHECK(proto.ParseFromArray(tagged_message.message(),
+ tagged_message.message_bytes()));
+ query_id = proto.query_id();
+ break;
+ }
+ case kCatalogRelationNewBlockMessage: {
+ serialization::CatalogRelationNewBlockMessage proto;
+ CHECK(proto.ParseFromArray(tagged_message.message(),
+ tagged_message.message_bytes()));
+ query_id = proto.query_id();
+ break;
+ }
+ case kDataPipelineMessage: {
+ serialization::DataPipelineMessage proto;
+ CHECK(proto.ParseFromArray(tagged_message.message(),
+ tagged_message.message_bytes()));
+ query_id = proto.query_id();
+ break;
+ }
+ case kWorkOrdersAvailableMessage: {
+ serialization::WorkOrdersAvailableMessage proto;
+ CHECK(proto.ParseFromArray(tagged_message.message(),
+ tagged_message.message_bytes()));
+ query_id = proto.query_id();
+ break;
+ }
+ case kWorkOrderFeedbackMessage: {
+ // TODO(harshad) Add query ID to FeedbackMessage.
+ WorkOrder::FeedbackMessage msg(const_cast<void *>(tagged_message.message()), tagged_message.message_bytes());
+ query_id = msg.header().query_id;
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unknown message type found in PolicyEnforcer";
+ }
+ DCHECK(admitted_queries_.find(query_id) != admitted_queries_.end());
+ const QueryManager::QueryStatusCode return_code =
+ admitted_queries_[query_id]->processMessage(tagged_message);
+ if (return_code == QueryManager::QueryStatusCode::kQueryExecuted) {
+ removeQuery(query_id);
+ if (!waiting_queries_.empty()) {
+ // Admit the earliest waiting query.
+ QueryHandle *new_query = waiting_queries_.front();
+ waiting_queries_.pop();
+ admitQuery(new_query);
+ }
+ }
+}
+
+void PolicyEnforcer::getWorkerMessages(
+ std::vector<std::unique_ptr<WorkerMessage>> *worker_messages) {
+ // Iterate over admitted queries until either there are no more
+ // messages available, or the maximum number of messages have
+ // been collected.
+ DCHECK(worker_messages->empty());
+ // TODO(harshad) - Make this function generic enough so that it
+ // works well when multiple queries are getting executed.
+ std::size_t per_query_share = 0;
+ if (!admitted_queries_.empty()) {
+ per_query_share = kMaxNumWorkerMessages / admitted_queries_.size();
+ } else {
+ LOG(WARNING) << "Requesting WorkerMessages when no query is running";
+ return;
+ }
+ DCHECK_GT(per_query_share, 0u);
+ std::vector<std::size_t> finished_queries_ids;
+
+ for (const auto &admitted_query_info : admitted_queries_) {
+ QueryManager *curr_query_manager = admitted_query_info.second.get();
+ DCHECK(curr_query_manager != nullptr);
+ std::size_t messages_collected_curr_query = 0;
+ while (messages_collected_curr_query < per_query_share) {
+ WorkerMessage *next_worker_message =
+ curr_query_manager->getNextWorkerMessage(0, -1);
+ if (next_worker_message != nullptr) {
+ ++messages_collected_curr_query;
+ worker_messages->push_back(std::unique_ptr<WorkerMessage>(next_worker_message));
+ } else {
+ // No more work ordes from the current query at this time.
+ // Check if the query's execution is over.
+ if (curr_query_manager->getQueryExecutionState().hasQueryExecutionFinished()) {
+ // If the query has been executed, remove it.
+ finished_queries_ids.push_back(admitted_query_info.first);
+ }
+ break;
+ }
+ }
+ }
+ for (std::size_t finished_qid : finished_queries_ids) {
+ removeQuery(finished_qid);
+ }
+}
+
+void PolicyEnforcer::removeQuery(const std::size_t query_id) {
+ DCHECK(admitted_queries_.find(query_id) != admitted_queries_.end());
+ if (!admitted_queries_[query_id]->getQueryExecutionState().hasQueryExecutionFinished()) {
+ LOG(WARNING) << "Removing query with ID " << query_id
+ << " that hasn't finished its execution";
+ }
+ admitted_queries_.erase(query_id);
+}
+
+bool PolicyEnforcer::admitQueries(
+ const std::vector<QueryHandle *> &query_handles) {
+ for (QueryHandle *curr_query : query_handles) {
+ if (!admitQuery(curr_query)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/query_execution/PolicyEnforcer.hpp
----------------------------------------------------------------------
diff --git a/query_execution/PolicyEnforcer.hpp b/query_execution/PolicyEnforcer.hpp
new file mode 100644
index 0000000..d4ba643
--- /dev/null
+++ b/query_execution/PolicyEnforcer.hpp
@@ -0,0 +1,168 @@
+/**
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin\u2014Madison.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_EXECUTION_POLICY_ENFORCER_HPP_
+#define QUICKSTEP_QUERY_EXECUTION_POLICY_ENFORCER_HPP_
+
+#include <cstddef>
+#include <memory>
+#include <queue>
+#include <unordered_map>
+#include <vector>
+
+#include "query_execution/QueryExecutionTypedefs.hpp"
+#include "query_execution/QueryManager.hpp"
+#include "query_execution/WorkerMessage.hpp"
+#include "utility/Macros.hpp"
+
+#include "glog/logging.h"
+
+#include "tmb/id_typedefs.h"
+#include "tmb/message_bus.h"
+#include "tmb/tagged_message.h"
+
+namespace quickstep {
+
+class CatalogDatabaseLite;
+class QueryHandle;
+class StorageManager;
+
+/**
+ * @brief A class that ensures that a high level policy is maintained
+ * in sharing resources among concurrent queries.
+ **/
+class PolicyEnforcer {
+ public:
+ /**
+ * @brief Constructor.
+ *
+ * @param foreman_client_id The TMB client ID of the Foreman.
+ * @param num_numa_nodes Number of NUMA nodes used by the system.
+ * @param catalog_database The CatalogDatabase used.
+ * @param storage_manager The StorageManager used.
+ * @param bus The TMB.
+ **/
+ PolicyEnforcer(const tmb::client_id foreman_client_id,
+ const std::size_t num_numa_nodes,
+ CatalogDatabaseLite *catalog_database,
+ StorageManager *storage_manager,
+ tmb::MessageBus *bus)
+ : foreman_client_id_(foreman_client_id),
+ num_numa_nodes_(num_numa_nodes),
+ catalog_database_(catalog_database),
+ storage_manager_(storage_manager),
+ bus_(bus) {}
+
+ /**
+ * @brief Destructor.
+ **/
+ ~PolicyEnforcer() {
+ if (hasQueries()) {
+ LOG(WARNING) << "Destructing PolicyEnforcer with some unfinished or "
+ "waiting queries";
+ }
+ }
+
+ /**
+ * @brief Admit a query to the system.
+ *
+ * @param query_handle The QueryHandle for the new query.
+ *
+ * @return Whether the query was admitted to the system.
+ **/
+ bool admitQuery(QueryHandle *query_handle);
+
+ /**
+ * @brief Admit multiple queries in the system.
+ *
+ * @note In the current simple implementation, we only allow one active
+ * query in the system. Other queries will have to wait.
+ *
+ * @param query_handles A vector of QueryHandles for the queries to be
+ * admitted.
+ *
+ * @return True if all the queries were admitted, false if at least one query
+ * was not admitted.
+ **/
+ bool admitQueries(const std::vector<QueryHandle*> &query_handles);
+
+ /**
+ * @brief Remove a given query that is under execution.
+ *
+ * @note This function is made public so that it is possible for a query
+ * to be killed. Otherwise, it should only be used privately by the
+ * class.
+ *
+ * TODO(harshad) - Extend this function to support removal of waiting queries.
+ *
+ * @param query_id The ID of the query to be removed.
+ **/
+ void removeQuery(const std::size_t query_id);
+
+ /**
+ * @brief Get worker messages to be dispatched. These worker messages come
+ * from the active queries.
+ *
+ * @param worker_messages The worker messages to be dispatched.
+ **/
+ void getWorkerMessages(
+ std::vector<std::unique_ptr<WorkerMessage>> *worker_messages);
+
+ /**
+ * @brief Process a message sent to the Foreman, which gets passed on to the
+ * policy enforcer.
+ *
+ * @param message The message.
+ **/
+ void processMessage(const TaggedMessage &tagged_message);
+
+ /**
+ * @brief Check if there are any queries to be executed.
+ *
+ * @return True if there is at least one active or waiting query, false if
+ * the policy enforcer doesn't have any query.
+ **/
+ inline bool hasQueries() const {
+ return !(admitted_queries_.empty() && waiting_queries_.empty());
+ }
+
+ private:
+ static constexpr std::size_t kMaxConcurrentQueries = 1;
+ static constexpr std::size_t kMaxNumWorkerMessages = 20;
+
+ const tmb::client_id foreman_client_id_;
+ const std::size_t num_numa_nodes_;
+
+ CatalogDatabaseLite *catalog_database_;
+ StorageManager *storage_manager_;
+
+ tmb::MessageBus *bus_;
+
+ // Key = query ID, value = QueryManager* for the key query.
+ std::unordered_map<std::size_t, std::unique_ptr<QueryManager>> admitted_queries_;
+
+ // The queries which haven't been admitted yet.
+ std::queue<QueryHandle*> waiting_queries_;
+
+ DISALLOW_COPY_AND_ASSIGN(PolicyEnforcer);
+};
+
+/** @} */
+
+} // namespace quickstep
+
+#endif // QUICKSTEP_QUERY_EXECUTION_QUERY_MANAGER_HPP_
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/query_execution/QueryContext.cpp
----------------------------------------------------------------------
diff --git a/query_execution/QueryContext.cpp b/query_execution/QueryContext.cpp
index 3bfce17..5c4c44e 100644
--- a/query_execution/QueryContext.cpp
+++ b/query_execution/QueryContext.cpp
@@ -56,7 +56,8 @@ QueryContext::QueryContext(const serialization::QueryContext &proto,
const CatalogDatabaseLite &database,
StorageManager *storage_manager,
const tmb::client_id scheduler_client_id,
- tmb::MessageBus *bus) {
+ tmb::MessageBus *bus)
+ : query_id_(proto.query_id()) {
DCHECK(ProtoIsValid(proto, database))
<< "Attempted to create QueryContext from an invalid proto description:\n"
<< proto.DebugString();
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/query_execution/QueryContext.hpp
----------------------------------------------------------------------
diff --git a/query_execution/QueryContext.hpp b/query_execution/QueryContext.hpp
index 7d5628d..83627bc 100644
--- a/query_execution/QueryContext.hpp
+++ b/query_execution/QueryContext.hpp
@@ -460,6 +460,13 @@ class QueryContext {
return update_groups_[id];
}
+ /**
+ * @param Get the ID of this query.
+ **/
+ inline const std::size_t getQueryID() const {
+ return query_id_;
+ }
+
private:
std::vector<std::unique_ptr<AggregationOperationState>> aggregation_states_;
std::vector<std::unique_ptr<BloomFilter>> bloom_filters_;
@@ -472,6 +479,8 @@ class QueryContext {
std::vector<std::unique_ptr<Tuple>> tuples_;
std::vector<std::unordered_map<attribute_id, std::unique_ptr<const Scalar>>> update_groups_;
+ const std::size_t query_id_;
+
DISALLOW_COPY_AND_ASSIGN(QueryContext);
};
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/query_execution/QueryContext.proto
----------------------------------------------------------------------
diff --git a/query_execution/QueryContext.proto b/query_execution/QueryContext.proto
index b37286c..98cd0b6 100644
--- a/query_execution/QueryContext.proto
+++ b/query_execution/QueryContext.proto
@@ -54,4 +54,6 @@ message QueryContext {
// NOTE(zuyu): For UpdateWorkOrder only.
repeated UpdateGroup update_groups = 10;
+
+ required uint64 query_id = 11;
}
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/query_execution/QueryExecutionMessages.proto
----------------------------------------------------------------------
diff --git a/query_execution/QueryExecutionMessages.proto b/query_execution/QueryExecutionMessages.proto
index 15803cf..9d9a9e5 100644
--- a/query_execution/QueryExecutionMessages.proto
+++ b/query_execution/QueryExecutionMessages.proto
@@ -27,6 +27,7 @@ message EmptyMessage {
message WorkOrderCompletionMessage {
required uint64 operator_index = 1;
required uint64 worker_thread_index = 2;
+ required uint64 query_id = 3;
}
message CatalogRelationNewBlockMessage {
@@ -35,16 +36,19 @@ message CatalogRelationNewBlockMessage {
// Used by PartitionAwareInsertDestination.
optional uint64 partition_id = 3;
+ required uint64 query_id = 4;
}
message DataPipelineMessage {
required uint64 operator_index = 1;
required fixed64 block_id = 2;
required int32 relation_id = 3;
+ required uint64 query_id = 4;
}
message WorkOrdersAvailableMessage {
required uint64 operator_index = 1;
+ required uint64 query_id = 2;
}
// BlockLocator related messages.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/query_execution/QueryExecutionTypedefs.hpp
----------------------------------------------------------------------
diff --git a/query_execution/QueryExecutionTypedefs.hpp b/query_execution/QueryExecutionTypedefs.hpp
index fc253bc..fa97158 100644
--- a/query_execution/QueryExecutionTypedefs.hpp
+++ b/query_execution/QueryExecutionTypedefs.hpp
@@ -58,6 +58,7 @@ using ClientIDMap = ThreadIDBasedMap<client_id,
// We sort the following message types in the order of a life cycle of a query.
enum QueryExecutionMessageType : message_type_id {
+ kAdmitRequestMessage, // Requesting a query (or queries) to be admitted.
kWorkOrderMessage, // From Foreman to Worker.
kWorkOrderCompleteMessage, // From Worker to Foreman.
kCatalogRelationNewBlockMessage, // From InsertDestination to Foreman.
@@ -67,7 +68,8 @@ enum QueryExecutionMessageType : message_type_id {
// their corresponding RelationalOperators.
kRebuildWorkOrderMessage, // From Foreman to Worker.
kRebuildWorkOrderCompleteMessage, // From Worker to Foreman.
- kPoisonMessage, // From the CLI shell to Foreman, then from Foreman to Workers.
+ kWorkloadCompletionMessage, // From Foreman to main thread.
+ kPoisonMessage, // From the main thread to Foreman and workers.
#ifdef QUICKSTEP_DISTRIBUTED
// BlockLocator related messages, sorted in a life cycle of StorageManager
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/query_execution/QueryExecutionUtil.hpp
----------------------------------------------------------------------
diff --git a/query_execution/QueryExecutionUtil.hpp b/query_execution/QueryExecutionUtil.hpp
index a8b6a38..78fd159 100644
--- a/query_execution/QueryExecutionUtil.hpp
+++ b/query_execution/QueryExecutionUtil.hpp
@@ -17,9 +17,12 @@
#ifndef QUICKSTEP_QUERY_EXECUTION_QUERY_EXECUTION_UTIL_HPP_
#define QUICKSTEP_QUERY_EXECUTION_QUERY_EXECUTION_UTIL_HPP_
+#include <memory>
#include <utility>
+#include "query_execution/AdmitRequestMessage.hpp"
#include "query_execution/QueryExecutionTypedefs.hpp"
+#include "query_execution/WorkerMessage.hpp"
#include "utility/Macros.hpp"
#include "tmb/address.h"
@@ -60,6 +63,55 @@ class QueryExecutionUtil {
std::move(tagged_message));
}
+ /**
+ * @brief Construct and send an AdmitRequestMessage from a given sender to a
+ * given recipient.
+ *
+ * @param sender_id The TMB client ID of the sender.
+ * @param receiver_id The TMB client ID of the receiver.
+ * @param query_handle The QueryHandle used in the AdmitRequestMessage.
+ * @param bus A pointer to the TMB.
+ * @param tagged_message A moved from reference to the tagged message.
+ *
+ * @return A status code indicating the result of the message delivery.
+ * The caller should ensure that the status is SendStatus::kOK.
+ **/
+ static tmb::MessageBus::SendStatus ConstructAndSendAdmitRequestMessage(
+ const tmb::client_id sender_id,
+ const tmb::client_id receiver_id,
+ QueryHandle *query_handle,
+ MessageBus *bus) {
+ std::unique_ptr<AdmitRequestMessage> request_message(
+ new AdmitRequestMessage(query_handle));
+ const std::size_t size_of_request_msg = sizeof(*request_message);
+ TaggedMessage admit_tagged_message(
+ request_message.release(), size_of_request_msg, kAdmitRequestMessage);
+
+ return QueryExecutionUtil::SendTMBMessage(
+ bus, sender_id, receiver_id, std::move(admit_tagged_message));
+ }
+
+ static void BroadcastPoisonMessage(const tmb::client_id sender_id, tmb::MessageBus *bus) {
+ // Terminate all threads.
+ // The sender thread broadcasts poison message to the workers and foreman.
+ // Each worker dies after receiving poison message. The order of workers'
+ // death is irrelavant.
+ MessageStyle style;
+ style.Broadcast(true);
+ Address address;
+ address.All(true);
+ std::unique_ptr<WorkerMessage> poison_message(WorkerMessage::PoisonMessage());
+ TaggedMessage poison_tagged_message(poison_message.get(),
+ sizeof(*poison_message),
+ kPoisonMessage);
+
+ const tmb::MessageBus::SendStatus send_status = bus->Send(
+ sender_id, address, style, std::move(poison_tagged_message));
+ CHECK(send_status == tmb::MessageBus::SendStatus::kOK) <<
+ "Broadcast poison message from sender with TMB client ID " << sender_id
+ << " failed";
+ }
+
private:
/**
* @brief Constructor. Made private to avoid instantiation.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/query_execution/QueryManager.hpp
----------------------------------------------------------------------
diff --git a/query_execution/QueryManager.hpp b/query_execution/QueryManager.hpp
index 47f54c5..b52460f 100644
--- a/query_execution/QueryManager.hpp
+++ b/query_execution/QueryManager.hpp
@@ -25,18 +25,21 @@
#include "catalog/CatalogTypedefs.hpp"
#include "query_execution/QueryContext.hpp"
#include "query_execution/QueryExecutionState.hpp"
+#include "query_execution/QueryExecutionTypedefs.hpp"
#include "query_execution/WorkOrdersContainer.hpp"
#include "relational_operators/RelationalOperator.hpp"
+#include "relational_operators/WorkOrder.hpp"
+#include "storage/StorageBlockInfo.hpp"
#include "utility/DAG.hpp"
#include "utility/Macros.hpp"
+#include "tmb/id_typedefs.h"
#include "tmb/message_bus.h"
#include "tmb/tagged_message.h"
namespace quickstep {
class CatalogDatabaseLite;
-class ForemanMessage;
class QueryHandle;
class StorageManager;
class WorkerMessage;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/query_execution/WorkOrdersContainer.hpp
----------------------------------------------------------------------
diff --git a/query_execution/WorkOrdersContainer.hpp b/query_execution/WorkOrdersContainer.hpp
index eb9aedd..6ed6474 100644
--- a/query_execution/WorkOrdersContainer.hpp
+++ b/query_execution/WorkOrdersContainer.hpp
@@ -28,6 +28,8 @@
#include "utility/Macros.hpp"
#include "utility/PtrVector.hpp"
+#include "glog/logging.h"
+
namespace quickstep {
/** \addtogroup QueryExecution
@@ -76,7 +78,7 @@ class WorkOrdersContainer {
* @return If there are pending WorkOrders.
**/
inline bool hasNormalWorkOrder(const std::size_t operator_index) const {
- DEBUG_ASSERT(operator_index < num_operators_);
+ DCHECK(operator_index < num_operators_);
return normal_workorders_[operator_index].hasWorkOrder();
}
@@ -92,9 +94,9 @@ class WorkOrdersContainer {
**/
inline bool hasNormalWorkOrderForNUMANode(
const std::size_t operator_index, const int numa_node_id) const {
- DEBUG_ASSERT(operator_index < num_operators_);
- DEBUG_ASSERT(numa_node_id >= 0);
- DEBUG_ASSERT(static_cast<std::size_t>(numa_node_id) < num_numa_nodes_);
+ DCHECK(operator_index < num_operators_);
+ DCHECK_GE(numa_node_id, 0);
+ DCHECK(static_cast<std::size_t>(numa_node_id) < num_numa_nodes_);
return normal_workorders_[operator_index].hasWorkOrderForNUMANode(
numa_node_id);
}
@@ -108,7 +110,7 @@ class WorkOrdersContainer {
* @return If there are pending rebuild WorkOrders.
**/
inline bool hasRebuildWorkOrder(const std::size_t operator_index) const {
- DEBUG_ASSERT(operator_index < num_operators_);
+ DCHECK(operator_index < num_operators_);
return rebuild_workorders_[operator_index].hasWorkOrder();
}
@@ -124,9 +126,9 @@ class WorkOrdersContainer {
**/
inline bool hasRebuildWorkOrderForNUMANode(
const std::size_t operator_index, const int numa_node_id) const {
- DEBUG_ASSERT(operator_index < num_operators_);
- DEBUG_ASSERT(numa_node_id >= 0);
- DEBUG_ASSERT(static_cast<std::size_t>(numa_node_id) < num_numa_nodes_);
+ DCHECK(operator_index < num_operators_);
+ DCHECK_GE(numa_node_id, 0);
+ DCHECK(static_cast<std::size_t>(numa_node_id) < num_numa_nodes_);
return rebuild_workorders_[operator_index].hasWorkOrderForNUMANode(
numa_node_id);
}
@@ -144,9 +146,9 @@ class WorkOrdersContainer {
**/
WorkOrder* getNormalWorkOrderForNUMANode(const std::size_t operator_index,
const int numa_node_id) {
- DEBUG_ASSERT(operator_index < num_operators_);
- DEBUG_ASSERT(numa_node_id >= 0);
- DEBUG_ASSERT(static_cast<std::size_t>(numa_node_id) < num_numa_nodes_);
+ DCHECK(operator_index < num_operators_);
+ DCHECK_GE(numa_node_id, 0);
+ DCHECK(static_cast<std::size_t>(numa_node_id) < num_numa_nodes_);
return normal_workorders_[operator_index].getWorkOrderForNUMANode(
numa_node_id);
}
@@ -164,7 +166,7 @@ class WorkOrdersContainer {
**/
WorkOrder* getNormalWorkOrder(const std::size_t operator_index,
const bool prefer_single_NUMA_node = true) {
- DEBUG_ASSERT(operator_index < num_operators_);
+ DCHECK(operator_index < num_operators_);
return normal_workorders_[operator_index].getWorkOrder(
prefer_single_NUMA_node);
}
@@ -182,9 +184,9 @@ class WorkOrdersContainer {
**/
WorkOrder* getRebuildWorkOrderForNUMANode(const std::size_t operator_index,
const int numa_node_id) {
- DEBUG_ASSERT(operator_index < num_operators_);
- DEBUG_ASSERT(numa_node_id >= 0);
- DEBUG_ASSERT(static_cast<std::size_t>(numa_node_id) < num_numa_nodes_);
+ DCHECK(operator_index < num_operators_);
+ DCHECK_GE(numa_node_id, 0);
+ DCHECK(static_cast<std::size_t>(numa_node_id) < num_numa_nodes_);
return rebuild_workorders_[operator_index].getWorkOrderForNUMANode(
numa_node_id);
}
@@ -202,7 +204,7 @@ class WorkOrdersContainer {
**/
WorkOrder* getRebuildWorkOrder(const std::size_t operator_index,
const bool prefer_single_NUMA_node = true) {
- DEBUG_ASSERT(operator_index < num_operators_);
+ DCHECK(operator_index < num_operators_);
return rebuild_workorders_[operator_index].getWorkOrder(
prefer_single_NUMA_node);
}
@@ -220,8 +222,8 @@ class WorkOrdersContainer {
* @param operator_index The index of the operator in the query DAG.
**/
void addNormalWorkOrder(WorkOrder *workorder, const std::size_t operator_index) {
- DEBUG_ASSERT(workorder != nullptr);
- DEBUG_ASSERT(operator_index < num_operators_);
+ DCHECK(workorder != nullptr);
+ DCHECK(operator_index < num_operators_);
normal_workorders_[operator_index].addWorkOrder(workorder);
}
@@ -238,8 +240,8 @@ class WorkOrdersContainer {
**/
void addRebuildWorkOrder(WorkOrder *workorder,
const std::size_t operator_index) {
- DEBUG_ASSERT(workorder != nullptr);
- DEBUG_ASSERT(operator_index < num_operators_);
+ DCHECK(workorder != nullptr);
+ DCHECK(operator_index < num_operators_);
rebuild_workorders_[operator_index].addWorkOrder(workorder);
}
@@ -254,9 +256,9 @@ class WorkOrdersContainer {
**/
inline std::size_t getNumNormalWorkOrdersForNUMANode(
const std::size_t operator_index, const int numa_node_id) const {
- DEBUG_ASSERT(operator_index < num_operators_);
- DEBUG_ASSERT(numa_node_id >= 0);
- DEBUG_ASSERT(static_cast<std::size_t>(numa_node_id) < num_numa_nodes_);
+ DCHECK(operator_index < num_operators_);
+ DCHECK_GE(numa_node_id, 0);
+ DCHECK(static_cast<std::size_t>(numa_node_id) < num_numa_nodes_);
return normal_workorders_[operator_index].getNumWorkOrdersForNUMANode(
numa_node_id);
}
@@ -271,7 +273,7 @@ class WorkOrdersContainer {
**/
inline std::size_t getNumNormalWorkOrders(
const std::size_t operator_index) const {
- DEBUG_ASSERT(operator_index < num_operators_);
+ DCHECK(operator_index < num_operators_);
return normal_workorders_[operator_index].getNumWorkOrders();
}
@@ -286,9 +288,9 @@ class WorkOrdersContainer {
**/
inline std::size_t getNumRebuildWorkOrdersForNUMANode(
const std::size_t operator_index, const int numa_node_id) const {
- DEBUG_ASSERT(operator_index < num_operators_);
- DEBUG_ASSERT(numa_node_id >= 0);
- DEBUG_ASSERT(static_cast<std::size_t>(numa_node_id) < num_numa_nodes_);
+ DCHECK(operator_index < num_operators_);
+ DCHECK_GE(numa_node_id, 0);
+ DCHECK(static_cast<std::size_t>(numa_node_id) < num_numa_nodes_);
return rebuild_workorders_[operator_index].getNumWorkOrdersForNUMANode(
numa_node_id);
}
@@ -303,7 +305,7 @@ class WorkOrdersContainer {
**/
inline std::size_t getNumRebuildWorkOrders(
const std::size_t operator_index) const {
- DEBUG_ASSERT(operator_index < num_operators_);
+ DCHECK(operator_index < num_operators_);
return rebuild_workorders_[operator_index].getNumWorkOrders();
}
@@ -418,8 +420,8 @@ class WorkOrdersContainer {
void addWorkOrder(WorkOrder *workorder);
bool hasWorkOrderForNUMANode(const int numa_node_id) const {
- DEBUG_ASSERT(numa_node_id >= 0);
- DEBUG_ASSERT(static_cast<std::size_t>(numa_node_id) < num_numa_nodes_);
+ DCHECK_GE(numa_node_id, 0);
+ DCHECK(static_cast<std::size_t>(numa_node_id) < num_numa_nodes_);
return single_numa_node_workorders_[numa_node_id].hasWorkOrder() ||
multiple_numa_nodes_workorders_.hasWorkOrderForNUMANode(
numa_node_id);
@@ -440,8 +442,8 @@ class WorkOrdersContainer {
std::size_t getNumWorkOrdersForNUMANode(
const int numa_node_id) const {
- DEBUG_ASSERT(numa_node_id >= 0);
- DEBUG_ASSERT(static_cast<std::size_t>(numa_node_id) < num_numa_nodes_);
+ DCHECK_GE(numa_node_id, 0);
+ DCHECK(static_cast<std::size_t>(numa_node_id) < num_numa_nodes_);
return single_numa_node_workorders_[numa_node_id].getNumWorkOrders() +
multiple_numa_nodes_workorders_.getNumWorkOrdersForNUMANode(
numa_node_id);
@@ -463,8 +465,8 @@ class WorkOrdersContainer {
}
WorkOrder* getWorkOrderForNUMANode(const int numa_node_id) {
- DEBUG_ASSERT(numa_node_id >= 0);
- DEBUG_ASSERT(static_cast<std::size_t>(numa_node_id) < num_numa_nodes_);
+ DCHECK_GE(numa_node_id, 0);
+ DCHECK(static_cast<std::size_t>(numa_node_id) < num_numa_nodes_);
WorkOrder *work_order = single_numa_node_workorders_[numa_node_id].getWorkOrder();
if (work_order == nullptr) {
work_order = multiple_numa_nodes_workorders_.getWorkOrderForNUMANode(
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/query_execution/Worker.cpp
----------------------------------------------------------------------
diff --git a/query_execution/Worker.cpp b/query_execution/Worker.cpp
index 645fd05..ef596e1 100644
--- a/query_execution/Worker.cpp
+++ b/query_execution/Worker.cpp
@@ -58,11 +58,14 @@ void Worker::run() {
WorkerMessage message(*static_cast<const WorkerMessage*>(tagged_message.message()));
DCHECK(message.getWorkOrder() != nullptr);
message.getWorkOrder()->execute();
+ const std::size_t query_id_for_workorder =
+ message.getWorkOrder()->getQueryID();
delete message.getWorkOrder();
- sendWorkOrderCompleteMessage(annotated_msg.sender,
- message.getRelationalOpIndex(),
- tagged_message.message_type() == kRebuildWorkOrderMessage);
+ sendWorkOrderCompleteMessage(
+ annotated_msg.sender, message.getRelationalOpIndex(),
+ query_id_for_workorder,
+ tagged_message.message_type() == kRebuildWorkOrderMessage);
break;
}
case kPoisonMessage: {
@@ -76,10 +79,12 @@ void Worker::run() {
void Worker::sendWorkOrderCompleteMessage(const tmb::client_id receiver,
const size_t op_index,
+ const size_t query_id,
const bool is_rebuild_work_order) {
serialization::WorkOrderCompletionMessage proto;
proto.set_operator_index(op_index);
proto.set_worker_thread_index(worker_thread_index_);
+ proto.set_query_id(query_id);
// NOTE(zuyu): Using the heap memory to serialize proto as a c-like string.
const size_t proto_length = proto.ByteSize();
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/query_execution/Worker.hpp
----------------------------------------------------------------------
diff --git a/query_execution/Worker.hpp b/query_execution/Worker.hpp
index b94e937..c0bafdc 100644
--- a/query_execution/Worker.hpp
+++ b/query_execution/Worker.hpp
@@ -97,11 +97,13 @@ class Worker : public Thread {
*
* @param receiver The id of the TMB client which should receive the response.
* @param op_index The index of the operator to which the WorkOrder belongs.
+ * @param query_id The ID of the query which the WorkOrder belongs to.
* @param is_rebuild_work_order True if it is a RebuildWorkOrder. Otherwise
* false.
**/
void sendWorkOrderCompleteMessage(const tmb::client_id receiver,
const std::size_t op_index,
+ const std::size_t query_id,
const bool is_rebuild_work_order);
const std::size_t worker_thread_index_;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/query_execution/WorkerMessage.hpp
----------------------------------------------------------------------
diff --git a/query_execution/WorkerMessage.hpp b/query_execution/WorkerMessage.hpp
index ec63af9..7adf2d3 100644
--- a/query_execution/WorkerMessage.hpp
+++ b/query_execution/WorkerMessage.hpp
@@ -105,6 +105,23 @@ class WorkerMessage {
return type_;
}
+ /**
+ * @brief Set a hint for the recipient worker thread.
+ *
+ * @param recipient_index_hint The hint i.e. the worker thread index.
+ **/
+ inline void setRecipientHint(const int recipient_index_hint) {
+ recipient_index_hint_ = recipient_index_hint;
+ }
+
+ /**
+ * @brief Get the hint for the recipient worker thread. The hint is invalid if
+ * it is -1.
+ **/
+ inline int getRecipientHint() const {
+ return recipient_index_hint_;
+ }
+
private:
/**
* @brief Constructor.
@@ -120,12 +137,13 @@ class WorkerMessage {
const WorkerMessageType type)
: work_unit_(work_unit),
relational_op_index_(relational_op_index),
- type_(type) {
- }
+ type_(type),
+ recipient_index_hint_(-1) {}
WorkOrder *work_unit_;
const std::size_t relational_op_index_;
const WorkerMessageType type_;
+ int recipient_index_hint_;
};
} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/query_execution/tests/QueryManager_unittest.cpp
----------------------------------------------------------------------
diff --git a/query_execution/tests/QueryManager_unittest.cpp b/query_execution/tests/QueryManager_unittest.cpp
index 308d5ca..62642ac 100644
--- a/query_execution/tests/QueryManager_unittest.cpp
+++ b/query_execution/tests/QueryManager_unittest.cpp
@@ -250,6 +250,7 @@ class QueryManagerTest : public ::testing::Test {
proto.set_block_id(0); // dummy block ID
proto.set_relation_id(0); // dummy relation ID.
+ proto.set_query_id(0); // dummy query ID.
// NOTE(zuyu): Using the heap memory to serialize proto as a c-like string.
const std::size_t proto_length = proto.ByteSize();
@@ -270,6 +271,7 @@ class QueryManagerTest : public ::testing::Test {
serialization::WorkOrderCompletionMessage proto;
proto.set_operator_index(index);
proto.set_worker_thread_index(1); // dummy worker ID.
+ proto.set_query_id(0); // dummy query ID.
// NOTE(zuyu): Using the heap memory to serialize proto as a c-like string.
const size_t proto_length = proto.ByteSize();
@@ -291,6 +293,7 @@ class QueryManagerTest : public ::testing::Test {
serialization::WorkOrderCompletionMessage proto;
proto.set_operator_index(index);
proto.set_worker_thread_index(1); // dummy worker thread ID.
+ proto.set_query_id(0); // dummy query ID.
// NOTE(zuyu): Using the heap memory to serialize proto as a c-like string.
const size_t proto_length = proto.ByteSize();
@@ -314,6 +317,7 @@ class QueryManagerTest : public ::testing::Test {
proto.set_block_id(0); // dummy block ID
proto.set_relation_id(0); // dummy relation ID.
+ proto.set_query_id(0); // dummy query ID.
// NOTE(zuyu): Using the heap memory to serialize proto as a c-like string.
const std::size_t proto_length = proto.ByteSize();
@@ -712,11 +716,13 @@ TEST_F(QueryManagerTest, TwoNodesDAGPartiallyFilledBlocksTest) {
// Setup the InsertDestination proto in the query context proto.
serialization::QueryContext *query_context_proto =
query_handle_->getQueryContextProtoMutable();
+ query_context_proto->set_query_id(0); // dummy query ID.
const QueryContext::insert_destination_id insert_destination_index =
query_context_proto->insert_destinations_size();
serialization::InsertDestination *insert_destination_proto =
query_context_proto->add_insert_destinations();
+ insert_destination_proto->set_query_id(query_context_proto->query_id());
insert_destination_proto->set_insert_destination_type(
serialization::InsertDestinationType::BLOCK_POOL);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/query_execution/tests/WorkOrdersContainer_unittest.cpp
----------------------------------------------------------------------
diff --git a/query_execution/tests/WorkOrdersContainer_unittest.cpp b/query_execution/tests/WorkOrdersContainer_unittest.cpp
index cf133c4..cb583ab 100644
--- a/query_execution/tests/WorkOrdersContainer_unittest.cpp
+++ b/query_execution/tests/WorkOrdersContainer_unittest.cpp
@@ -72,6 +72,7 @@ TEST(WorkOrdersContainerTest, ZeroNUMANodesAddWorkOrderTest) {
// they get inserted and retrieved correctly.
std::vector<int> numa_node_ids;
// A container for one operator and no NUMA nodes.
+ const std::size_t query_id = 0;
WorkOrdersContainer w(1, 0);
EXPECT_EQ(0u, w.getNumNormalWorkOrders(0));
@@ -104,11 +105,15 @@ TEST(WorkOrdersContainerTest, ZeroNUMANodesAddWorkOrderTest) {
ASSERT_TRUE(returned_work_order != nullptr);
EXPECT_EQ(work_order.getID(), static_cast<MockNUMAWorkOrder*>(returned_work_order)->getID());
+ EXPECT_EQ(query_id, returned_work_order->getQueryID());
+
WorkOrder *returned_rebuild_work_order = w.getRebuildWorkOrder(0);
ASSERT_TRUE(returned_rebuild_work_order != nullptr);
EXPECT_EQ(work_order1.getID(),
static_cast<MockNUMAWorkOrder *>(returned_rebuild_work_order)->getID());
+ EXPECT_EQ(query_id, returned_rebuild_work_order->getQueryID());
+
// Container should be empty now.
EXPECT_EQ(0u, w.getNumNormalWorkOrders(0));
EXPECT_EQ(0u, w.getNumRebuildWorkOrders(0));
@@ -123,6 +128,7 @@ TEST(WorkOrdersContainerTest, ZeroNUMANodesMultipleWorkOrdersTest) {
// if they get inserted and retrieved correctly and the order of retrieval.
// A container for one operator and no NUMA nodes.
std::vector<int> numa_node_ids;
+ const std::size_t query_id = 0;
WorkOrdersContainer w(1, 0);
EXPECT_EQ(0u, w.getNumNormalWorkOrders(0));
@@ -164,6 +170,8 @@ TEST(WorkOrdersContainerTest, ZeroNUMANodesMultipleWorkOrdersTest) {
ASSERT_TRUE(returned_work_order != nullptr);
EXPECT_EQ(static_cast<int>(kNumWorkOrders + i),
static_cast<MockNUMAWorkOrder *>(returned_rebuild_work_order)->getID());
+ EXPECT_EQ(query_id, returned_work_order->getQueryID());
+ EXPECT_EQ(query_id, returned_rebuild_work_order->getQueryID());
}
// Container should be empty now.
@@ -190,6 +198,7 @@ TEST(WorkOrdersContainerTest, MultipleNUMANodesTest) {
const std::size_t kNUMANodesUsed = numa_node_ids.size();
// A container for one operator and kNUMANodes.
+ const std::size_t query_id = 0;
WorkOrdersContainer w(1, kNUMANodes);
for (std::size_t i = 0; i < kNUMANodesUsed; ++i) {
@@ -246,6 +255,9 @@ TEST(WorkOrdersContainerTest, MultipleNUMANodesTest) {
ASSERT_TRUE(returned_rebuild_work_order != nullptr);
EXPECT_EQ(rebuild_workorders[i].getID(),
static_cast<MockNUMAWorkOrder *>(returned_rebuild_work_order)->getID());
+
+ EXPECT_EQ(query_id, returned_work_order->getQueryID());
+ EXPECT_EQ(query_id, returned_rebuild_work_order->getQueryID());
}
// No workorder should be left for this operator on any NUMA node.
@@ -291,6 +303,7 @@ TEST(WorkOrdersContainerTest, AllTypesWorkOrdersTest) {
const std::size_t kNUMANodesUsed = numa_nodes.size();
// Create the container.
+ const std::size_t query_id = 0;
WorkOrdersContainer w(1, kNUMANodes);
w.addNormalWorkOrder(&multiple_numa_work_order, 0);
@@ -331,6 +344,7 @@ TEST(WorkOrdersContainerTest, AllTypesWorkOrdersTest) {
w.getNormalWorkOrderForNUMANode(0, numa_nodes[0]));
ASSERT_TRUE(observed_work_order != nullptr);
+ EXPECT_EQ(query_id, observed_work_order->getQueryID());
EXPECT_EQ(one_numa_work_order.getPreferredNUMANodes().front(),
observed_work_order->getPreferredNUMANodes().front());
EXPECT_EQ(one_numa_work_order.getID(), observed_work_order->getID());
@@ -348,6 +362,7 @@ TEST(WorkOrdersContainerTest, AllTypesWorkOrdersTest) {
EXPECT_EQ(no_numa_work_order.getID(),
static_cast<MockNUMAWorkOrder *>(observed_non_numa_work_order)->getID());
+ EXPECT_EQ(query_id, observed_non_numa_work_order->getQueryID());
EXPECT_EQ(1u, w.getNumNormalWorkOrdersForNUMANode(0, numa_nodes[0]));
EXPECT_EQ(1u, w.getNumNormalWorkOrdersForNUMANode(0, numa_nodes[1]));
@@ -361,6 +376,7 @@ TEST(WorkOrdersContainerTest, AllTypesWorkOrdersTest) {
ASSERT_TRUE(observed_work_order_multiple_numa_nodes != nullptr);
EXPECT_EQ(multiple_numa_work_order.getID(), observed_work_order_multiple_numa_nodes->getID());
+ EXPECT_EQ(query_id, observed_work_order_multiple_numa_nodes->getQueryID());
std::vector<int> observed_numa_nodes(
observed_work_order_multiple_numa_nodes->getPreferredNUMANodes());
// Look up the expected numa nodes in the observed_numa_nodes vector.
@@ -427,6 +443,7 @@ TEST(WorkOrdersContainerTest, MultipleOperatorsNormalWorkOrderTest) {
const std::size_t kNUMANodes = numa_node_ids.size();
// Create the container.
+ const std::size_t query_id = 0;
WorkOrdersContainer w(kNumOperators, kNUMANodes);
std::vector<std::size_t> operator_ids;
@@ -538,6 +555,7 @@ TEST(WorkOrdersContainerTest, MultipleOperatorsNormalWorkOrderTest) {
curr_operator_id, single_numa_node_id));
ASSERT_TRUE(observed_work_order_single_numa != nullptr);
+ EXPECT_EQ(query_id, observed_work_order_single_numa->getQueryID());
// Verify if the workorder ID is correct.
const int expected_workorder_id_single_numa =
normal_workorders_one_numa_ids[curr_operator_id];
@@ -550,6 +568,7 @@ TEST(WorkOrdersContainerTest, MultipleOperatorsNormalWorkOrderTest) {
curr_operator_id, multiple_numa_node_id));
ASSERT_TRUE(observed_work_order_multiple_numa != nullptr);
+ EXPECT_EQ(query_id, observed_work_order_multiple_numa->getQueryID());
// Verify if the workorder ID is correct.
const int expected_workorder_id_multiple_numa =
normal_workorders_multiple_numa_ids[curr_operator_id];
@@ -562,6 +581,7 @@ TEST(WorkOrdersContainerTest, MultipleOperatorsNormalWorkOrderTest) {
static_cast<MockNUMAWorkOrder *>(w.getNormalWorkOrder(curr_operator_id));
ASSERT_TRUE(observed_work_order_no_numa != nullptr);
+ EXPECT_EQ(query_id, observed_work_order_no_numa->getQueryID());
// Verify if the workorder ID is correct.
const int expected_workorder_id_no_numa =
normal_workorders_no_numa_ids[curr_operator_id];
@@ -620,6 +640,7 @@ TEST(WorkOrdersContainerTest, MultipleOperatorsRebuildWorkOrderTest) {
const std::size_t kNUMANodes = numa_node_ids.size();
// Create the container.
+ const std::size_t query_id = 0;
WorkOrdersContainer w(kNumOperators, kNUMANodes);
std::vector<std::size_t> operator_ids;
@@ -732,6 +753,7 @@ TEST(WorkOrdersContainerTest, MultipleOperatorsRebuildWorkOrderTest) {
curr_operator_id, single_numa_node_id));
ASSERT_TRUE(observed_work_order_single_numa != nullptr);
+ EXPECT_EQ(query_id, observed_work_order_single_numa->getQueryID());
// Verify if the workorder ID is correct.
const int expected_workorder_id_single_numa =
rebuild_workorders_one_numa_ids[curr_operator_id];
@@ -744,6 +766,7 @@ TEST(WorkOrdersContainerTest, MultipleOperatorsRebuildWorkOrderTest) {
curr_operator_id, multiple_numa_node_id));
ASSERT_TRUE(observed_work_order_multiple_numa != nullptr);
+ EXPECT_EQ(query_id, observed_work_order_multiple_numa->getQueryID());
// Verify if the workorder ID is correct.
const int expected_workorder_id_multiple_numa =
rebuild_workorders_multiple_numa_ids[curr_operator_id];
@@ -755,6 +778,7 @@ TEST(WorkOrdersContainerTest, MultipleOperatorsRebuildWorkOrderTest) {
MockNUMAWorkOrder *observed_work_order_no_numa =
static_cast<MockNUMAWorkOrder *>(w.getRebuildWorkOrder(curr_operator_id));
+ EXPECT_EQ(query_id, observed_work_order_no_numa->getQueryID());
// Verify if the workorder ID is correct.
const int expected_workorder_id_no_numa =
rebuild_workorders_no_numa_ids[curr_operator_id];
@@ -772,6 +796,7 @@ TEST(WorkOrdersContainerTest, RetrievalOrderTest) {
numa_node_ids.push_back(0);
const std::size_t kNumWorkOrdersPerType = 100;
+ const std::size_t query_id = 0;
WorkOrdersContainer w(1, 2);
std::vector<int> single_numa_node_workorder_ids;
@@ -820,6 +845,7 @@ TEST(WorkOrdersContainerTest, RetrievalOrderTest) {
MockNUMAWorkOrder *observed_work_order = static_cast<MockNUMAWorkOrder *>(
w.getNormalWorkOrder(0, prefer_single_NUMA_node));
ASSERT_TRUE(observed_work_order != nullptr);
+ EXPECT_EQ(query_id, observed_work_order->getQueryID());
if (prefer_single_NUMA_node) {
EXPECT_EQ(*single_numa_it, observed_work_order->getID());
EXPECT_EQ(1u, observed_work_order->getPreferredNUMANodes().size());
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index f9fd742..075d270 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -401,7 +401,7 @@ void ExecutionGenerator::convertSample(const P::SamplePtr &physical_sample) {
const QueryContext::insert_destination_id insert_destination_index =
query_context_proto_->insert_destinations_size();
S::InsertDestination *insert_destination_proto =
- query_context_proto_->add_insert_destinations();
+ addNewInsertDestinationToQueryContext();
createTemporaryCatalogRelation(physical_sample,
&output_relation,
insert_destination_proto);
@@ -513,7 +513,7 @@ void ExecutionGenerator::convertSelection(
const CatalogRelation *output_relation = nullptr;
const QueryContext::insert_destination_id insert_destination_index =
query_context_proto_->insert_destinations_size();
- S::InsertDestination *insert_destination_proto = query_context_proto_->add_insert_destinations();
+ S::InsertDestination *insert_destination_proto = addNewInsertDestinationToQueryContext();
createTemporaryCatalogRelation(physical_selection,
&output_relation,
insert_destination_proto);
@@ -753,7 +753,7 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) {
const CatalogRelation *output_relation = nullptr;
const QueryContext::insert_destination_id insert_destination_index =
query_context_proto_->insert_destinations_size();
- S::InsertDestination *insert_destination_proto = query_context_proto_->add_insert_destinations();
+ S::InsertDestination *insert_destination_proto = addNewInsertDestinationToQueryContext();
createTemporaryCatalogRelation(physical_plan,
&output_relation,
insert_destination_proto);
@@ -879,7 +879,7 @@ void ExecutionGenerator::convertNestedLoopsJoin(
const CatalogRelation *output_relation = nullptr;
const QueryContext::insert_destination_id insert_destination_index =
query_context_proto_->insert_destinations_size();
- S::InsertDestination *insert_destination_proto = query_context_proto_->add_insert_destinations();
+ S::InsertDestination *insert_destination_proto = addNewInsertDestinationToQueryContext();
createTemporaryCatalogRelation(physical_plan,
&output_relation,
insert_destination_proto);
@@ -926,7 +926,7 @@ void ExecutionGenerator::convertCopyFrom(
// Create InsertDestination proto.
const QueryContext::insert_destination_id insert_destination_index =
query_context_proto_->insert_destinations_size();
- S::InsertDestination *insert_destination_proto = query_context_proto_->add_insert_destinations();
+ S::InsertDestination *insert_destination_proto = addNewInsertDestinationToQueryContext();
insert_destination_proto->set_insert_destination_type(S::InsertDestinationType::BLOCK_POOL);
insert_destination_proto->set_relation_id(output_relation->getID());
@@ -1152,7 +1152,7 @@ void ExecutionGenerator::convertInsertTuple(
// Create InsertDestination proto.
const QueryContext::insert_destination_id insert_destination_index =
query_context_proto_->insert_destinations_size();
- S::InsertDestination *insert_destination_proto = query_context_proto_->add_insert_destinations();
+ S::InsertDestination *insert_destination_proto = addNewInsertDestinationToQueryContext();
insert_destination_proto->set_insert_destination_type(S::InsertDestinationType::BLOCK_POOL);
insert_destination_proto->set_relation_id(input_relation.getID());
@@ -1209,7 +1209,7 @@ void ExecutionGenerator::convertInsertSelection(
// Create InsertDestination proto.
const QueryContext::insert_destination_id insert_destination_index =
query_context_proto_->insert_destinations_size();
- S::InsertDestination *insert_destination_proto = query_context_proto_->add_insert_destinations();
+ S::InsertDestination *insert_destination_proto = addNewInsertDestinationToQueryContext();
insert_destination_proto->set_insert_destination_type(S::InsertDestinationType::BLOCK_POOL);
insert_destination_proto->set_relation_id(destination_relation.getID());
@@ -1280,7 +1280,7 @@ void ExecutionGenerator::convertUpdateTable(
// Create InsertDestination proto.
const QueryContext::insert_destination_id relocation_destination_index =
query_context_proto_->insert_destinations_size();
- S::InsertDestination *relocation_destination_proto = query_context_proto_->add_insert_destinations();
+ S::InsertDestination *relocation_destination_proto = addNewInsertDestinationToQueryContext();
relocation_destination_proto->set_insert_destination_type(S::InsertDestinationType::BLOCK_POOL);
relocation_destination_proto->set_relation_id(input_rel_id);
@@ -1446,7 +1446,7 @@ void ExecutionGenerator::convertAggregate(
const CatalogRelation *output_relation = nullptr;
const QueryContext::insert_destination_id insert_destination_index =
query_context_proto_->insert_destinations_size();
- S::InsertDestination *insert_destination_proto = query_context_proto_->add_insert_destinations();
+ S::InsertDestination *insert_destination_proto = addNewInsertDestinationToQueryContext();
createTemporaryCatalogRelation(physical_plan,
&output_relation,
insert_destination_proto);
@@ -1496,7 +1496,7 @@ void ExecutionGenerator::convertSort(const P::SortPtr &physical_sort) {
const QueryContext::insert_destination_id initial_runs_destination_id =
query_context_proto_->insert_destinations_size();
S::InsertDestination *initial_runs_destination_proto =
- query_context_proto_->add_insert_destinations();
+ addNewInsertDestinationToQueryContext();
createTemporaryCatalogRelation(
physical_sort, &initial_runs_relation, initial_runs_destination_proto);
@@ -1542,7 +1542,7 @@ void ExecutionGenerator::convertSort(const P::SortPtr &physical_sort) {
const QueryContext::insert_destination_id merged_runs_destination_id =
query_context_proto_->insert_destinations_size();
S::InsertDestination *merged_runs_destination_proto =
- query_context_proto_->add_insert_destinations();
+ addNewInsertDestinationToQueryContext();
createTemporaryCatalogRelation(physical_sort,
&merged_runs_relation,
merged_runs_destination_proto);
@@ -1550,7 +1550,7 @@ void ExecutionGenerator::convertSort(const P::SortPtr &physical_sort) {
const QueryContext::insert_destination_id sorted_output_destination_id =
query_context_proto_->insert_destinations_size();
S::InsertDestination *sorted_output_destination_proto =
- query_context_proto_->add_insert_destinations();
+ addNewInsertDestinationToQueryContext();
createTemporaryCatalogRelation(physical_sort,
&sorted_relation,
sorted_output_destination_proto);
@@ -1606,7 +1606,7 @@ void ExecutionGenerator::convertTableGenerator(
const QueryContext::insert_destination_id insert_destination_index =
query_context_proto_->insert_destinations_size();
S::InsertDestination *insert_destination_proto =
- query_context_proto_->add_insert_destinations();
+ addNewInsertDestinationToQueryContext();
createTemporaryCatalogRelation(physical_tablegen,
&output_relation,
insert_destination_proto);
@@ -1635,5 +1635,11 @@ void ExecutionGenerator::convertTableGenerator(
temporary_relation_info_vec_.emplace_back(tablegen_index, output_relation);
}
+S::InsertDestination* ExecutionGenerator::addNewInsertDestinationToQueryContext() {
+ S::InsertDestination *insert_destination_proto(query_context_proto_->add_insert_destinations());
+ insert_destination_proto->set_query_id(query_context_proto_->query_id());
+ return insert_destination_proto;
+}
+
} // namespace optimizer
} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/query_optimizer/ExecutionGenerator.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.hpp b/query_optimizer/ExecutionGenerator.hpp
index 0630bca..c453f7a 100644
--- a/query_optimizer/ExecutionGenerator.hpp
+++ b/query_optimizer/ExecutionGenerator.hpp
@@ -105,6 +105,7 @@ class ExecutionGenerator {
execution_plan_(DCHECK_NOTNULL(query_handle->getQueryPlanMutable())),
query_context_proto_(DCHECK_NOTNULL(query_handle->getQueryContextProtoMutable())),
execution_heuristics_(new ExecutionHeuristics()) {
+ query_context_proto_->set_query_id(query_handle_->query_id());
#ifdef QUICKSTEP_DISTRIBUTED
catalog_database_cache_proto_ = DCHECK_NOTNULL(query_handle->getCatalogDatabaseCacheProtoMutable());
#endif
@@ -374,6 +375,14 @@ class ExecutionGenerator {
*/
void dropAllTemporaryRelations();
+ /**
+ * @brief Add a new InsertDesetination to the QueryContext and set the
+ * query ID for the InsertDestination.
+ *
+ * @return A pointer to the serialized InsertDestination.
+ **/
+ serialization::InsertDestination* addNewInsertDestinationToQueryContext();
+
OptimizerContext *optimizer_context_;
QueryHandle *query_handle_;
QueryPlan *execution_plan_; // A part of QueryHandle.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/query_optimizer/tests/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/CMakeLists.txt b/query_optimizer/tests/CMakeLists.txt
index 6ef2a03..5b58f75 100644
--- a/query_optimizer/tests/CMakeLists.txt
+++ b/query_optimizer/tests/CMakeLists.txt
@@ -116,9 +116,11 @@ target_link_libraries(quickstep_queryoptimizer_tests_ExecutionGeneratorTest
quickstep_cli_PrintToScreen
quickstep_parser_ParseStatement
quickstep_parser_SqlParserWrapper
+ quickstep_queryexecution_AdmitRequestMessage
quickstep_queryexecution_Foreman
quickstep_queryexecution_QueryContext
quickstep_queryexecution_QueryExecutionTypedefs
+ quickstep_queryexecution_QueryExecutionUtil
quickstep_queryexecution_Worker
quickstep_queryexecution_WorkerDirectory
quickstep_queryexecution_WorkerMessage
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/query_optimizer/tests/ExecutionGeneratorTestRunner.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/ExecutionGeneratorTestRunner.cpp b/query_optimizer/tests/ExecutionGeneratorTestRunner.cpp
index 56b53ba..930087a 100644
--- a/query_optimizer/tests/ExecutionGeneratorTestRunner.cpp
+++ b/query_optimizer/tests/ExecutionGeneratorTestRunner.cpp
@@ -24,7 +24,9 @@
#include "cli/DropRelation.hpp"
#include "cli/PrintToScreen.hpp"
#include "parser/ParseStatement.hpp"
+#include "query_execution/AdmitRequestMessage.hpp"
#include "query_execution/Foreman.hpp"
+#include "query_execution/QueryExecutionUtil.hpp"
#include "query_execution/Worker.hpp"
#include "query_optimizer/ExecutionGenerator.hpp"
#include "query_optimizer/LogicalGenerator.hpp"
@@ -40,6 +42,8 @@
#include "glog/logging.h"
+#include "tmb/tagged_message.h"
+
namespace quickstep {
class CatalogRelation;
@@ -90,13 +94,20 @@ void ExecutionGeneratorTestRunner::runTestCase(
physical_generator.generatePlan(
logical_generator.generatePlan(*result.parsed_statement));
execution_generator.generatePlan(physical_plan);
- foreman_->setQueryPlan(
- query_handle.getQueryPlanMutable()->getQueryPlanDAGMutable());
-
- foreman_->reconstructQueryContextFromProto(query_handle.getQueryContextProto());
- foreman_->start();
- foreman_->join();
+ AdmitRequestMessage request_message(&query_handle);
+ TaggedMessage admit_tagged_message(
+ &request_message, sizeof(request_message), kAdmitRequestMessage);
+ QueryExecutionUtil::SendTMBMessage(&bus_,
+ main_thread_client_id_,
+ foreman_->getBusClientID(),
+ std::move(admit_tagged_message));
+
+ // Receive workload completion message from Foreman.
+ const AnnotatedMessage annotated_msg =
+ bus_.Receive(main_thread_client_id_, 0, true);
+ const TaggedMessage &tagged_message = annotated_msg.tagged_message;
+ DCHECK(tagged_message.message_type() == kWorkloadCompletionMessage);
const CatalogRelation *query_result_relation = query_handle.getQueryResultRelation();
if (query_result_relation) {
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/query_optimizer/tests/ExecutionGeneratorTestRunner.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/ExecutionGeneratorTestRunner.hpp b/query_optimizer/tests/ExecutionGeneratorTestRunner.hpp
index 8352d55..9204073 100644
--- a/query_optimizer/tests/ExecutionGeneratorTestRunner.hpp
+++ b/query_optimizer/tests/ExecutionGeneratorTestRunner.hpp
@@ -61,9 +61,11 @@ class ExecutionGeneratorTestRunner : public TextBasedTestRunner {
bus_.Initialize();
- foreman_.reset(new Foreman(&bus_,
- test_database_loader_.catalog_database(),
- test_database_loader_.storage_manager()));
+ main_thread_client_id_ = bus_.Connect();
+ bus_.RegisterClientAsSender(main_thread_client_id_, kAdmitRequestMessage);
+ bus_.RegisterClientAsSender(main_thread_client_id_, kPoisonMessage);
+ bus_.RegisterClientAsReceiver(main_thread_client_id_, kWorkloadCompletionMessage);
+
worker_.reset(new Worker(0, &bus_));
std::vector<client_id> worker_client_ids;
@@ -75,27 +77,20 @@ class ExecutionGeneratorTestRunner : public TextBasedTestRunner {
workers_.reset(new WorkerDirectory(1 /* number of workers */,
worker_client_ids, numa_nodes));
- foreman_->setWorkerDirectory(workers_.get());
+ foreman_.reset(new Foreman(main_thread_client_id_,
+ workers_.get(),
+ &bus_,
+ test_database_loader_.catalog_database(),
+ test_database_loader_.storage_manager()));
+ foreman_->start();
worker_->start();
}
~ExecutionGeneratorTestRunner() {
- std::unique_ptr<WorkerMessage> poison_message(WorkerMessage::PoisonMessage());
- TaggedMessage poison_tagged_message(poison_message.get(),
- sizeof(*poison_message),
- quickstep::kPoisonMessage);
-
- Address worker_address;
- MessageStyle single_receiver_style;
-
- worker_address.AddRecipient(worker_->getBusClientID());
- bus_.Send(foreman_->getBusClientID(),
- worker_address,
- single_receiver_style,
- std::move(poison_tagged_message));
-
+ QueryExecutionUtil::BroadcastPoisonMessage(main_thread_client_id_, &bus_);
worker_->join();
+ foreman_->join();
}
void runTestCase(const std::string &input,
@@ -112,6 +107,8 @@ class ExecutionGeneratorTestRunner : public TextBasedTestRunner {
std::unique_ptr<WorkerDirectory> workers_;
+ tmb::client_id main_thread_client_id_;
+
// This map is needed for InsertDestination and some operators that send
// messages to Foreman directly. To know the reason behind the design of this
// map, see the note in InsertDestination.hpp.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/query_optimizer/tests/TestDatabaseLoader.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/TestDatabaseLoader.cpp b/query_optimizer/tests/TestDatabaseLoader.cpp
index 2de69b6..764ff2f 100644
--- a/query_optimizer/tests/TestDatabaseLoader.cpp
+++ b/query_optimizer/tests/TestDatabaseLoader.cpp
@@ -122,6 +122,7 @@ void TestDatabaseLoader::loadTestRelation() {
nullptr,
&storage_manager_,
0 /* dummy op index */,
+ 0, // dummy query ID.
scheduler_client_id_,
&bus_);
int sign = 1;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/relational_operators/AggregationOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/AggregationOperator.hpp b/relational_operators/AggregationOperator.hpp
index f340d4e..6a8590a 100644
--- a/relational_operators/AggregationOperator.hpp
+++ b/relational_operators/AggregationOperator.hpp
@@ -111,6 +111,8 @@ class AggregationWorkOrder : public WorkOrder {
*
* @param query_id The ID of this query.
* @param input_block_id The block id.
+ * @param query_id The ID of this query.
+ * @param input_block_id The block id.
* @param state The AggregationState to use.
**/
AggregationWorkOrder(const std::size_t query_id,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/relational_operators/DeleteOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/DeleteOperator.cpp b/relational_operators/DeleteOperator.cpp
index 15dc9e3..ec3bc20 100644
--- a/relational_operators/DeleteOperator.cpp
+++ b/relational_operators/DeleteOperator.cpp
@@ -96,6 +96,7 @@ void DeleteWorkOrder::execute() {
proto.set_operator_index(delete_operator_index_);
proto.set_block_id(input_block_id_);
proto.set_relation_id(input_relation_.getID());
+ proto.set_query_id(query_id_);
// NOTE(zuyu): Using the heap memory to serialize proto as a c-like string.
const std::size_t proto_length = proto.ByteSize();
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/relational_operators/DeleteOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/DeleteOperator.hpp b/relational_operators/DeleteOperator.hpp
index c55f585..fdc9b00 100644
--- a/relational_operators/DeleteOperator.hpp
+++ b/relational_operators/DeleteOperator.hpp
@@ -162,6 +162,7 @@ class DeleteWorkOrder : public WorkOrder {
StorageManager *storage_manager_;
const std::size_t delete_operator_index_;
+
const tmb::client_id scheduler_client_id_;
MessageBus *bus_;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/relational_operators/DestroyHashOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/DestroyHashOperator.hpp b/relational_operators/DestroyHashOperator.hpp
index 7d8acb7..b7fe1ac 100644
--- a/relational_operators/DestroyHashOperator.hpp
+++ b/relational_operators/DestroyHashOperator.hpp
@@ -48,6 +48,8 @@ class DestroyHashOperator : public RelationalOperator {
*
* @param query_id The ID of the query to which this operator belongs.
* @param hash_table_index The index of the JoinHashTable in QueryContext.
+ * @param query_id The ID of the query to which this operator belongs.
+ * @param hash_table_index The index of the JoinHashTable in QueryContext.
**/
DestroyHashOperator(const std::size_t query_id,
const QueryContext::join_hash_table_id hash_table_index)
@@ -80,6 +82,8 @@ class DestroyHashWorkOrder : public WorkOrder {
*
* @param query_id The ID of the query to which this WorkOrder belongs.
* @param hash_table_index The index of the JoinHashTable in QueryContext.
+ * @param query_id The ID of the query to which this WorkOrder belongs.
+ * @param hash_table_index The index of the JoinHashTable in QueryContext.
* @param query_context The QueryContext to use.
**/
DestroyHashWorkOrder(const std::size_t query_id,
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/relational_operators/DropTableOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/DropTableOperator.hpp b/relational_operators/DropTableOperator.hpp
index a0a8d6e..0cdb733 100644
--- a/relational_operators/DropTableOperator.hpp
+++ b/relational_operators/DropTableOperator.hpp
@@ -57,6 +57,8 @@ class DropTableOperator : public RelationalOperator {
*
* @param query_id The ID of the query to which this operator belongs.
* @param relation The relation to drop.
+ * @param query_id The ID of the query to which this operator belongs.
+ * @param relation The relation to drop.
* @param database The databse where to drop \c relation.
* @param only_drop_blocks If true, only drop the blocks belonging to \c
* relation, but leave \c relation in \c database.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/relational_operators/HashJoinOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.hpp b/relational_operators/HashJoinOperator.hpp
index 1d5d4e3..d2d3e74 100644
--- a/relational_operators/HashJoinOperator.hpp
+++ b/relational_operators/HashJoinOperator.hpp
@@ -631,6 +631,7 @@ class HashOuterJoinWorkOrder : public WorkOrder {
* is using attributes from the build relation as input. Note that the
* length of this vector should equal the length of \p selection.
* @param lookup_block_id The block id of the probe_relation.
+ * @param query_id The ID of the query to which this WorkOrder belongs.
* @param output_destination The InsertDestination to insert the join results.
* @param storage_manager The StorageManager to use.
**/
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/relational_operators/RebuildWorkOrder.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/RebuildWorkOrder.hpp b/relational_operators/RebuildWorkOrder.hpp
index 86f8eaf..3125447 100644
--- a/relational_operators/RebuildWorkOrder.hpp
+++ b/relational_operators/RebuildWorkOrder.hpp
@@ -85,6 +85,7 @@ class RebuildWorkOrder : public WorkOrder {
proto.set_operator_index(input_operator_index_);
proto.set_block_id(block_ref_->getID());
proto.set_relation_id(input_relation_id_);
+ proto.set_query_id(query_id_);
// NOTE(zuyu): Using the heap memory to serialize proto as a c-like string.
const std::size_t proto_length = proto.ByteSize();
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/relational_operators/SortMergeRunOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/SortMergeRunOperator.cpp b/relational_operators/SortMergeRunOperator.cpp
index 6bf5719..40fde13 100644
--- a/relational_operators/SortMergeRunOperator.cpp
+++ b/relational_operators/SortMergeRunOperator.cpp
@@ -259,6 +259,7 @@ void SortMergeRunWorkOrder::execute() {
// Send completion message to operator.
FeedbackMessage msg(SortMergeRunOperator::kRunOutputMessage,
+ getQueryID(),
operator_index_,
serialized_output.first,
serialized_output.second);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/relational_operators/TableGeneratorOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/TableGeneratorOperator.hpp b/relational_operators/TableGeneratorOperator.hpp
index 6a6af4b..c8adb9a 100644
--- a/relational_operators/TableGeneratorOperator.hpp
+++ b/relational_operators/TableGeneratorOperator.hpp
@@ -115,6 +115,8 @@ class TableGeneratorWorkOrder : public WorkOrder {
*
* @param query_id The ID of the query to which this WorkOrder belongs.
* @param generator_function The GeneratorFunctionHandle to use.
+ * @param query_id The ID of the query to which this WorkOrder belongs.
+ * @param generator_function The GeneratorFunctionHandle to use.
* @param output_destination The InsertDestination to insert the generated
* output.
**/
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/relational_operators/UpdateOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/UpdateOperator.cpp b/relational_operators/UpdateOperator.cpp
index 2130563..7a99000 100644
--- a/relational_operators/UpdateOperator.cpp
+++ b/relational_operators/UpdateOperator.cpp
@@ -91,6 +91,7 @@ void UpdateWorkOrder::execute() {
proto.set_operator_index(update_operator_index_);
proto.set_block_id(input_block_id_);
proto.set_relation_id(relation_.getID());
+ proto.set_query_id(query_id_);
// NOTE(zuyu): Using the heap memory to serialize proto as a c-like string.
const std::size_t proto_length = proto.ByteSize();
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/relational_operators/UpdateOperator.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/UpdateOperator.hpp b/relational_operators/UpdateOperator.hpp
index cebb9b5..b4f9b9d 100644
--- a/relational_operators/UpdateOperator.hpp
+++ b/relational_operators/UpdateOperator.hpp
@@ -181,6 +181,7 @@ class UpdateWorkOrder : public WorkOrder {
StorageManager *storage_manager_;
const std::size_t update_operator_index_;
+
const tmb::client_id scheduler_client_id_;
MessageBus *bus_;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/relational_operators/WorkOrder.hpp
----------------------------------------------------------------------
diff --git a/relational_operators/WorkOrder.hpp b/relational_operators/WorkOrder.hpp
index 059865d..df195cc 100644
--- a/relational_operators/WorkOrder.hpp
+++ b/relational_operators/WorkOrder.hpp
@@ -65,20 +65,25 @@ class WorkOrder {
* relational operator.
*/
struct FeedbackMessageHeader {
+ std::size_t query_id;
std::size_t rel_op_index;
std::size_t payload_size;
FeedbackMessageType payload_type;
/**
* @brief Header constructor.
+ *
+ * @param query_id The ID of the query.
* @param relational_op_index Index of the relation operator.
* @param payload_size Size of the payload of the message.
* @param payload_type Type of payload.
*/
- FeedbackMessageHeader(const std::size_t relational_op_index,
+ FeedbackMessageHeader(const std::size_t query_id,
+ const std::size_t relational_op_index,
const std::size_t payload_size,
const FeedbackMessageType payload_type)
- : rel_op_index(relational_op_index),
+ : query_id(query_id),
+ rel_op_index(relational_op_index),
payload_size(payload_size),
payload_type(payload_type) {}
};
@@ -93,17 +98,19 @@ class WorkOrder {
* @brief Feedback message constructor.
*
* @param type Type of the message.
+ * @param query_id The ID of the query.
* @param rel_op_index Relational operator index.
* @param payload Blob of payload.
* @param payload_size Size of the payload blob.
* @param ownership Whether to take ownership of the payload blob.
*/
FeedbackMessage(const FeedbackMessageType type,
+ const std::size_t query_id,
const std::size_t rel_op_index,
void *payload,
const std::size_t payload_size,
const bool ownership = true)
- : header_(rel_op_index, payload_size, type),
+ : header_(query_id, rel_op_index, payload_size, type),
payload_(payload),
ownership_(ownership) {}
@@ -285,6 +292,13 @@ class WorkOrder {
" receiver thread with TMB client ID " << receiver_id;
}
+ /**
+ * @brief Get the ID of the query which this WorkOder belongs to.
+ **/
+ inline const std::size_t getQueryID() const {
+ return query_id_;
+ }
+
protected:
/**
* @brief Constructor.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/relational_operators/tests/AggregationOperator_unittest.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/tests/AggregationOperator_unittest.cpp b/relational_operators/tests/AggregationOperator_unittest.cpp
index fdcc54f..27d974d 100644
--- a/relational_operators/tests/AggregationOperator_unittest.cpp
+++ b/relational_operators/tests/AggregationOperator_unittest.cpp
@@ -228,6 +228,8 @@ class AggregationOperatorTest : public ::testing::Test {
// Setup the aggregation state proto in the query context proto.
serialization::QueryContext query_context_proto;
+ query_context_proto.set_query_id(0); // dummy query ID.
+
const QueryContext::aggregation_state_id aggr_state_index = query_context_proto.aggregation_states_size();
serialization::AggregationOperationState *aggr_state_proto = query_context_proto.add_aggregation_states();
aggr_state_proto->set_relation_id(table_->getID());
@@ -277,6 +279,7 @@ class AggregationOperatorTest : public ::testing::Test {
const QueryContext::insert_destination_id insert_destination_index =
query_context_proto.insert_destinations_size();
serialization::InsertDestination *insert_destination_proto = query_context_proto.add_insert_destinations();
+ insert_destination_proto->set_query_id(query_context_proto.query_id());
insert_destination_proto->set_insert_destination_type(serialization::InsertDestinationType::BLOCK_POOL);
insert_destination_proto->set_relation_id(result_table_->getID());
@@ -319,6 +322,8 @@ class AggregationOperatorTest : public ::testing::Test {
// Setup the aggregation state proto in the query context proto.
serialization::QueryContext query_context_proto;
+ query_context_proto.set_query_id(0); // dummy query ID.
+
const QueryContext::aggregation_state_id aggr_state_index = query_context_proto.aggregation_states_size();
serialization::AggregationOperationState *aggr_state_proto = query_context_proto.add_aggregation_states();
aggr_state_proto->set_relation_id(table_->getID());
@@ -362,6 +367,8 @@ class AggregationOperatorTest : public ::testing::Test {
const QueryContext::insert_destination_id insert_destination_index =
query_context_proto.insert_destinations_size();
serialization::InsertDestination *insert_destination_proto = query_context_proto.add_insert_destinations();
+ insert_destination_proto->set_query_id(query_context_proto.query_id());
+ insert_destination_proto->set_query_id(query_context_proto.query_id());
insert_destination_proto->set_insert_destination_type(serialization::InsertDestinationType::BLOCK_POOL);
insert_destination_proto->set_relation_id(result_table_->getID());
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/relational_operators/tests/HashJoinOperator_unittest.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/tests/HashJoinOperator_unittest.cpp b/relational_operators/tests/HashJoinOperator_unittest.cpp
index 074b603..8fee50d 100644
--- a/relational_operators/tests/HashJoinOperator_unittest.cpp
+++ b/relational_operators/tests/HashJoinOperator_unittest.cpp
@@ -294,6 +294,7 @@ class HashJoinOperatorTest : public ::testing::TestWithParam<HashTableImplType>
TEST_P(HashJoinOperatorTest, LongKeyHashJoinTest) {
// Setup the hash table proto in the query context proto.
serialization::QueryContext query_context_proto;
+ query_context_proto.set_query_id(0); // dummy query ID.
const QueryContext::join_hash_table_id join_hash_table_index =
query_context_proto.join_hash_tables_size();
@@ -355,6 +356,7 @@ TEST_P(HashJoinOperatorTest, LongKeyHashJoinTest) {
const QueryContext::insert_destination_id output_destination_index =
query_context_proto.insert_destinations_size();
serialization::InsertDestination *insert_destination_proto = query_context_proto.add_insert_destinations();
+ insert_destination_proto->set_query_id(query_context_proto.query_id());
insert_destination_proto->set_insert_destination_type(serialization::InsertDestinationType::BLOCK_POOL);
insert_destination_proto->set_relation_id(output_relation_id);
@@ -434,6 +436,7 @@ TEST_P(HashJoinOperatorTest, LongKeyHashJoinTest) {
TEST_P(HashJoinOperatorTest, IntDuplicateKeyHashJoinTest) {
// Setup the hash table proto in the query context proto.
serialization::QueryContext query_context_proto;
+ query_context_proto.set_query_id(0); // dummy query ID.
const QueryContext::join_hash_table_id join_hash_table_index =
query_context_proto.join_hash_tables_size();
@@ -503,6 +506,7 @@ TEST_P(HashJoinOperatorTest, IntDuplicateKeyHashJoinTest) {
const QueryContext::insert_destination_id output_destination_index =
query_context_proto.insert_destinations_size();
serialization::InsertDestination *insert_destination_proto = query_context_proto.add_insert_destinations();
+ insert_destination_proto->set_query_id(query_context_proto.query_id());
insert_destination_proto->set_insert_destination_type(serialization::InsertDestinationType::BLOCK_POOL);
insert_destination_proto->set_relation_id(output_relation_id);
@@ -604,6 +608,7 @@ TEST_P(HashJoinOperatorTest, IntDuplicateKeyHashJoinTest) {
TEST_P(HashJoinOperatorTest, CharKeyCartesianProductHashJoinTest) {
// Setup the hash table proto in the query context proto.
serialization::QueryContext query_context_proto;
+ query_context_proto.set_query_id(0); // dummy query ID.
const QueryContext::join_hash_table_id join_hash_table_index =
query_context_proto.join_hash_tables_size();
@@ -660,6 +665,7 @@ TEST_P(HashJoinOperatorTest, CharKeyCartesianProductHashJoinTest) {
const QueryContext::insert_destination_id output_destination_index =
query_context_proto.insert_destinations_size();
serialization::InsertDestination *insert_destination_proto = query_context_proto.add_insert_destinations();
+ insert_destination_proto->set_query_id(query_context_proto.query_id());
insert_destination_proto->set_insert_destination_type(serialization::InsertDestinationType::BLOCK_POOL);
insert_destination_proto->set_relation_id(output_relation_id);
@@ -739,6 +745,7 @@ TEST_P(HashJoinOperatorTest, CharKeyCartesianProductHashJoinTest) {
TEST_P(HashJoinOperatorTest, VarCharDuplicateKeyHashJoinTest) {
// Setup the hash table proto in the query context proto.
serialization::QueryContext query_context_proto;
+ query_context_proto.set_query_id(0); // dummy query ID.
const QueryContext::join_hash_table_id join_hash_table_index =
query_context_proto.join_hash_tables_size();
@@ -801,6 +808,7 @@ TEST_P(HashJoinOperatorTest, VarCharDuplicateKeyHashJoinTest) {
const QueryContext::insert_destination_id output_destination_index =
query_context_proto.insert_destinations_size();
serialization::InsertDestination *insert_destination_proto = query_context_proto.add_insert_destinations();
+ insert_destination_proto->set_query_id(query_context_proto.query_id());
insert_destination_proto->set_insert_destination_type(serialization::InsertDestinationType::BLOCK_POOL);
insert_destination_proto->set_relation_id(output_relation_id);
@@ -906,6 +914,7 @@ TEST_P(HashJoinOperatorTest, VarCharDuplicateKeyHashJoinTest) {
TEST_P(HashJoinOperatorTest, CompositeKeyHashJoinTest) {
// Setup the hash table proto in the query context proto.
serialization::QueryContext query_context_proto;
+ query_context_proto.set_query_id(0); // dummy query ID.
const QueryContext::join_hash_table_id join_hash_table_index =
query_context_proto.join_hash_tables_size();
@@ -973,6 +982,7 @@ TEST_P(HashJoinOperatorTest, CompositeKeyHashJoinTest) {
const QueryContext::insert_destination_id output_destination_index =
query_context_proto.insert_destinations_size();
serialization::InsertDestination *insert_destination_proto = query_context_proto.add_insert_destinations();
+ insert_destination_proto->set_query_id(query_context_proto.query_id());
insert_destination_proto->set_insert_destination_type(serialization::InsertDestinationType::BLOCK_POOL);
insert_destination_proto->set_relation_id(output_relation_id);
@@ -1083,6 +1093,7 @@ TEST_P(HashJoinOperatorTest, CompositeKeyHashJoinTest) {
TEST_P(HashJoinOperatorTest, CompositeKeyHashJoinWithResidualPredicateTest) {
// Setup the hash table proto in the query context proto.
serialization::QueryContext query_context_proto;
+ query_context_proto.set_query_id(0); // dummy query ID.
const QueryContext::join_hash_table_id join_hash_table_index =
query_context_proto.join_hash_tables_size();
@@ -1150,6 +1161,7 @@ TEST_P(HashJoinOperatorTest, CompositeKeyHashJoinWithResidualPredicateTest) {
const QueryContext::insert_destination_id output_destination_index =
query_context_proto.insert_destinations_size();
serialization::InsertDestination *insert_destination_proto = query_context_proto.add_insert_destinations();
+ insert_destination_proto->set_query_id(query_context_proto.query_id());
insert_destination_proto->set_insert_destination_type(serialization::InsertDestinationType::BLOCK_POOL);
insert_destination_proto->set_relation_id(output_relation_id);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/relational_operators/tests/SortMergeRunOperator_unittest.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/tests/SortMergeRunOperator_unittest.cpp b/relational_operators/tests/SortMergeRunOperator_unittest.cpp
index fc10671..46fb7ae 100644
--- a/relational_operators/tests/SortMergeRunOperator_unittest.cpp
+++ b/relational_operators/tests/SortMergeRunOperator_unittest.cpp
@@ -189,6 +189,7 @@ class RunTest : public ::testing::Test {
nullptr,
storage_manager_.get(),
kOpIndex,
+ 0, // dummy query ID.
foreman_client_id_,
&bus_));
}
@@ -433,6 +434,7 @@ class RunMergerTest : public ::testing::Test {
nullptr,
storage_manager_.get(),
kOpIndex,
+ 0, // dummy query ID.
foreman_client_id_,
&bus_));
}
@@ -1269,9 +1271,12 @@ class SortMergeRunOperatorTest : public ::testing::Test {
ASSERT_EQ(null_col3_, result_table_->getAttributeByName("null-col-3")->getID());
ASSERT_EQ(tid_col_, result_table_->getAttributeByName("tid")->getID());
+ query_context_proto_.set_query_id(0); // dummy query ID.
+
// Setup the InsertDestination proto in the query context proto.
insert_destination_index_ = query_context_proto_.insert_destinations_size();
serialization::InsertDestination *insert_destination_proto = query_context_proto_.add_insert_destinations();
+ insert_destination_proto->set_query_id(query_context_proto_.query_id());
insert_destination_proto->set_insert_destination_type(serialization::InsertDestinationType::BLOCK_POOL);
insert_destination_proto->set_relation_id(result_table_id);
@@ -1291,6 +1296,7 @@ class SortMergeRunOperatorTest : public ::testing::Test {
run_destination_index_ = query_context_proto_.insert_destinations_size();
insert_destination_proto = query_context_proto_.add_insert_destinations();
+ insert_destination_proto->set_query_id(query_context_proto_.query_id());
insert_destination_proto->set_insert_destination_type(serialization::InsertDestinationType::BLOCK_POOL);
insert_destination_proto->set_relation_id(run_table_id);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/94a2e1dc/relational_operators/tests/SortRunGenerationOperator_unittest.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/tests/SortRunGenerationOperator_unittest.cpp b/relational_operators/tests/SortRunGenerationOperator_unittest.cpp
index 71a80e4..3eeb7e9 100644
--- a/relational_operators/tests/SortRunGenerationOperator_unittest.cpp
+++ b/relational_operators/tests/SortRunGenerationOperator_unittest.cpp
@@ -328,10 +328,12 @@ class SortRunGenerationOperatorTest : public ::testing::Test {
const std::vector<bool> &null_ordering) {
// Setup the InsertDestination proto in the query context proto.
serialization::QueryContext query_context_proto;
+ query_context_proto.set_query_id(0); // dummy query ID.
const QueryContext::insert_destination_id insert_destination_index =
query_context_proto.insert_destinations_size();
serialization::InsertDestination *insert_destination_proto = query_context_proto.add_insert_destinations();
+ insert_destination_proto->set_query_id(query_context_proto.query_id());
insert_destination_proto->set_insert_destination_type(serialization::InsertDestinationType::BLOCK_POOL);
insert_destination_proto->set_relation_id(result_table_->getID());