You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by zu...@apache.org on 2017/01/11 01:01:22 UTC
[30/50] incubator-quickstep git commit: Adds support for
PartialBulkInserts in StorageBlocks
Adds support for PartialBulkInserts in StorageBlocks
- Enables use of PartialBulkInserts in StorageBlocks
- Value accessor changes to allow use of 2 insert destinations
- Enables PartialInserts for SplitRow
- Changes HashJoin operator so that it can take advantage of the
PartialInserts code.
- This also cleans up code from Previous commit.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/2d11ec58
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/2d11ec58
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/2d11ec58
Branch: refs/heads/quickstep_partition_parser_support
Commit: 2d11ec588e7e8d7a7a1a8adfc28fb30e5fe0852e
Parents: 172b51b
Author: navsan <na...@gmail.com>
Authored: Mon Nov 7 14:35:01 2016 -0600
Committer: cramja <ma...@gmail.com>
Committed: Mon Nov 21 14:28:02 2016 -0600
----------------------------------------------------------------------
relational_operators/HashJoinOperator.cpp | 150 ++++++++++++++++---
storage/InsertDestination.cpp | 84 +++++++++++
storage/InsertDestination.hpp | 16 ++
storage/InsertDestinationInterface.hpp | 22 +++
storage/SplitRowStoreTupleStorageSubBlock.hpp | 4 +-
storage/StorageBlock.cpp | 24 +++
storage/StorageBlock.hpp | 44 ++++++
storage/TupleStorageSubBlock.hpp | 50 +++++++
types/containers/ColumnVectorsValueAccessor.hpp | 4 +
9 files changed, 373 insertions(+), 25 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d11ec58/relational_operators/HashJoinOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.cpp b/relational_operators/HashJoinOperator.cpp
index 4a91f86..2028046 100644
--- a/relational_operators/HashJoinOperator.cpp
+++ b/relational_operators/HashJoinOperator.cpp
@@ -65,10 +65,11 @@ namespace {
// Functor passed to HashTable::getAllFromValueAccessor() to collect matching
// tuples from the inner relation. It stores matching tuple ID pairs
-// in an unordered_map keyed by inner block ID.
-class MapBasedJoinedTupleCollector {
+// in an unordered_map keyed by inner block ID and a vector of
+// pairs of (build-tupleID, probe-tuple-ID).
+class VectorsOfPairsJoinedTuplesCollector {
public:
- MapBasedJoinedTupleCollector() {
+ VectorsOfPairsJoinedTuplesCollector() {
}
template <typename ValueAccessorT>
@@ -95,6 +96,34 @@ class MapBasedJoinedTupleCollector {
std::unordered_map<block_id, std::vector<std::pair<tuple_id, tuple_id>>> joined_tuples_;
};
+// Another collector using an unordered_map keyed on inner block just like above,
+// except that it uses of a pair of (build-tupleIDs-vector, probe-tuple-IDs-vector).
+class PairsOfVectorsJoinedTuplesCollector {
+ public:
+ PairsOfVectorsJoinedTuplesCollector() {
+ }
+
+ template <typename ValueAccessorT>
+ inline void operator()(const ValueAccessorT &accessor,
+ const TupleReference &tref) {
+ joined_tuples_[tref.block].first.push_back(tref.tuple);
+ joined_tuples_[tref.block].second.push_back(accessor.getCurrentPosition());
+ }
+
+ // Get a mutable pointer to the collected map of joined tuple ID pairs. The
+ // key is inner block_id, value is a pair consisting of
+ // inner block tuple IDs (first) and outer block tuple IDs (second).
+ inline std::unordered_map< block_id, std::pair<std::vector<tuple_id>, std::vector<tuple_id>>>*
+ getJoinedTuples() {
+ return &joined_tuples_;
+ }
+
+ private:
+ std::unordered_map<
+ block_id,
+ std::pair<std::vector<tuple_id>, std::vector<tuple_id>>> joined_tuples_;
+};
+
class SemiAntiJoinTupleCollector {
public:
explicit SemiAntiJoinTupleCollector(TupleIdSequence *filter)
@@ -432,7 +461,7 @@ void HashInnerJoinWorkOrder::execute() {
base_accessor->createSharedTupleIdSequenceAdapterVirtual(*existence_map));
}
- MapBasedJoinedTupleCollector collector;
+ PairsOfVectorsJoinedTuplesCollector collector;
if (join_key_attributes_.size() == 1) {
hash_table_.getAllFromValueAccessor(
probe_accessor.get(),
@@ -450,12 +479,14 @@ void HashInnerJoinWorkOrder::execute() {
const relation_id build_relation_id = build_relation_.getID();
const relation_id probe_relation_id = probe_relation_.getID();
- for (std::pair<const block_id, std::vector<std::pair<tuple_id, tuple_id>>>
+ for (std::pair<const block_id, std::pair<std::vector<tuple_id>, std::vector<tuple_id>>>
&build_block_entry : *collector.getJoinedTuples()) {
BlockReference build_block =
storage_manager_->getBlock(build_block_entry.first, build_relation_);
const TupleStorageSubBlock &build_store = build_block->getTupleStorageSubBlock();
std::unique_ptr<ValueAccessor> build_accessor(build_store.createValueAccessor());
+ const std::vector<tuple_id> &build_tids = build_block_entry.second.first;
+ const std::vector<tuple_id> &probe_tids = build_block_entry.second.second;
// Evaluate '*residual_predicate_', if any.
//
@@ -468,17 +499,16 @@ void HashInnerJoinWorkOrder::execute() {
// hash join is below a reasonable threshold so that we don't blow up
// temporary memory requirements to an unreasonable degree.
if (residual_predicate_ != nullptr) {
- std::vector<std::pair<tuple_id, tuple_id>> filtered_matches;
-
- for (const std::pair<tuple_id, tuple_id> &hash_match
- : build_block_entry.second) {
+ std::pair<std::vector<tuple_id>, std::vector<tuple_id>> filtered_matches;
+ for (std::size_t i = 0; i < build_tids.size(); ++i) {
if (residual_predicate_->matchesForJoinedTuples(*build_accessor,
build_relation_id,
- hash_match.first,
+ build_tids[i],
*probe_accessor,
probe_relation_id,
- hash_match.second)) {
- filtered_matches.emplace_back(hash_match);
+ probe_tids[i])) {
+ filtered_matches.first.push_back(build_tids[i]);
+ filtered_matches.second.push_back(probe_tids[i]);
}
}
@@ -501,22 +531,96 @@ void HashInnerJoinWorkOrder::execute() {
// benefit (probably only a real performance win when there are very few
// matching tuples in each individual inner block but very many inner
// blocks with at least one match).
+
+ // We now create ordered value accessors for both build and probe side,
+ // using the joined tuple TIDs. Note that we have to use this Lambda-based
+ // invocation method here because the accessors don't have a virtual
+ // function that creates such an OrderedTupleIdSequenceAdapterValueAccessor.
+ std::unique_ptr<ValueAccessor> ordered_build_accessor, ordered_probe_accessor;
+ InvokeOnValueAccessorNotAdapter(
+ build_accessor.get(),
+ [&](auto *accessor) -> void { // NOLINT(build/c++11)
+ ordered_build_accessor.reset(
+ accessor->createSharedOrderedTupleIdSequenceAdapter(build_tids));
+ });
+
+ if (probe_accessor->isTupleIdSequenceAdapter()) {
+ InvokeOnTupleIdSequenceAdapterValueAccessor(
+ probe_accessor.get(),
+ [&](auto *accessor) -> void { // NOLINT(build/c++11)
+ ordered_probe_accessor.reset(
+ accessor->createSharedOrderedTupleIdSequenceAdapter(probe_tids));
+ });
+ } else {
+ InvokeOnValueAccessorNotAdapter(
+ probe_accessor.get(),
+ [&](auto *accessor) -> void { // NOLINT(build/c++11)
+ ordered_probe_accessor.reset(
+ accessor->createSharedOrderedTupleIdSequenceAdapter(probe_tids));
+ });
+ }
+
+
+ // We also need a temp value accessor to store results of any scalar expressions.
ColumnVectorsValueAccessor temp_result;
- for (vector<unique_ptr<const Scalar>>::const_iterator selection_cit = selection_.begin();
- selection_cit != selection_.end();
- ++selection_cit) {
- temp_result.addColumn((*selection_cit)->getAllValuesForJoin(build_relation_id,
- build_accessor.get(),
- probe_relation_id,
- probe_accessor.get(),
- build_block_entry.second));
+
+ // Create a map of ValueAccessors and what attributes we want to pick from them
+ std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> accessor_attribute_map;
+ const std::vector<ValueAccessor *> accessors{
+ ordered_build_accessor.get(), ordered_probe_accessor.get(), &temp_result};
+ const unsigned int build_index = 0, probe_index = 1, temp_index = 2;
+ for (auto &accessor : accessors) {
+ accessor_attribute_map.push_back(std::make_pair(
+ accessor,
+ std::vector<attribute_id>(selection_.size(), kInvalidCatalogId)));
+ }
+
+ attribute_id dest_attr = 0;
+ std::vector<std::pair<tuple_id, tuple_id>> zipped_joined_tuple_ids;
+
+ for (auto &selection_cit : selection_) {
+ // If the Scalar (column) is not an attribute in build/probe blocks, then
+ // insert it into a ColumnVectorsValueAccessor.
+ if (selection_cit->getDataSource() != Scalar::ScalarDataSource::kAttribute) {
+ // Current destination attribute maps to the column we'll create now.
+ accessor_attribute_map[temp_index].second[dest_attr] = temp_result.getNumColumns();
+
+ if (temp_result.getNumColumns() == 0) {
+ // The getAllValuesForJoin function below needs joined tuple IDs as
+ // a vector of pair of (build-tuple-ID, probe-tuple-ID), and we have
+ // a pair of (build-tuple-IDs-vector, probe-tuple-IDs-vector). So
+ // we'll have to zip our two vectors together. We do this inside
+ // the loop because most queries don't exercise this code since
+ // they don't have scalar expressions with attributes from both
+ // build and probe relations (other expressions would have been
+ // pushed down to before the join).
+ zipped_joined_tuple_ids.reserve(build_tids.size());
+ for (std::size_t i = 0; i < build_tids.size(); ++i) {
+ zipped_joined_tuple_ids.push_back(std::make_pair(build_tids[i], probe_tids[i]));
+ }
+ }
+ temp_result.addColumn(
+ selection_cit
+ ->getAllValuesForJoin(build_relation_id, build_accessor.get(),
+ probe_relation_id, probe_accessor.get(),
+ zipped_joined_tuple_ids));
+ } else {
+ auto scalar_attr = static_cast<const ScalarAttribute *>(selection_cit.get());
+ const attribute_id attr_id = scalar_attr->getAttribute().getID();
+ if (scalar_attr->getAttribute().getParent().getID() == build_relation_id) {
+ accessor_attribute_map[build_index].second[dest_attr] = attr_id;
+ } else {
+ accessor_attribute_map[probe_index].second[dest_attr] = attr_id;
+ }
+ }
+ ++dest_attr;
}
// NOTE(chasseur): calling the bulk-insert method of InsertDestination once
// for each pair of joined blocks incurs some extra overhead that could be
// avoided by keeping checked-out MutableBlockReferences across iterations
// of this loop, but that would get messy when combined with partitioning.
- output_destination_->bulkInsertTuples(&temp_result);
+ output_destination_->bulkInsertTuplesFromValueAccessors(accessor_attribute_map);
}
}
@@ -550,7 +654,7 @@ void HashSemiJoinWorkOrder::executeWithResidualPredicate() {
// We collect all the matching probe relation tuples, as there's a residual
// preidcate that needs to be applied after collecting these matches.
- MapBasedJoinedTupleCollector collector;
+ VectorsOfPairsJoinedTuplesCollector collector;
if (join_key_attributes_.size() == 1) {
hash_table_.getAllFromValueAccessor(
probe_accessor.get(),
@@ -759,7 +863,7 @@ void HashAntiJoinWorkOrder::executeWithResidualPredicate() {
base_accessor->createSharedTupleIdSequenceAdapterVirtual(*existence_map));
}
- MapBasedJoinedTupleCollector collector;
+ VectorsOfPairsJoinedTuplesCollector collector;
// We probe the hash table and get all the matches. Unlike
// executeWithoutResidualPredicate(), we have to collect all the matching
// tuples, because after this step we still have to evalute the residual
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d11ec58/storage/InsertDestination.cpp
----------------------------------------------------------------------
diff --git a/storage/InsertDestination.cpp b/storage/InsertDestination.cpp
index 5e83453..067edf6 100644
--- a/storage/InsertDestination.cpp
+++ b/storage/InsertDestination.cpp
@@ -247,6 +247,90 @@ void InsertDestination::bulkInsertTuplesWithRemappedAttributes(
});
}
+// A common case that we can optimize away is when the attribute_map
+// for an accessor only contains gaps. e.g. This happens for a join when
+// there are no attributes selected from one side.
+void removeGapOnlyAccessors(
+ const std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>>* accessor_attribute_map,
+ std::vector<std::pair<ValueAccessor *, const std::vector<attribute_id>>>* reduced_accessor_attribute_map) {
+ for (std::size_t i = 0; i < accessor_attribute_map->size(); ++i) {
+ bool all_gaps = true;
+ for (const auto &attr : (*accessor_attribute_map)[i].second)
+ if (attr != kInvalidCatalogId) {
+ all_gaps = false;
+ break;
+ }
+ if (all_gaps)
+ continue;
+ reduced_accessor_attribute_map->push_back((*accessor_attribute_map)[i]);
+ (*accessor_attribute_map)[i].first->beginIterationVirtual();
+ }
+}
+
+void InsertDestination::bulkInsertTuplesFromValueAccessors(
+ const std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> &accessor_attribute_map,
+ bool always_mark_full) {
+ // Handle pathological corner case where there are no accessors
+ if (accessor_attribute_map.size() == 0)
+ return;
+
+ std::vector<std::pair<ValueAccessor *, const std::vector<attribute_id>>> reduced_accessor_attribute_map;
+ removeGapOnlyAccessors(&accessor_attribute_map, &reduced_accessor_attribute_map);
+
+ // We assume that all input accessors have the same number of tuples, so
+ // the iterations finish together. Therefore, we can just check the first one.
+ auto first_accessor = reduced_accessor_attribute_map[0].first;
+ while (!first_accessor->iterationFinishedVirtual()) {
+ tuple_id num_tuples_to_insert = kCatalogMaxID;
+ tuple_id num_tuples_inserted = 0;
+ MutableBlockReference output_block = this->getBlockForInsertion();
+
+ // Now iterate through all the accessors and do one round of bulk-insertion
+ // of partial tuples into the selected output_block.
+ // While inserting from the first ValueAccessor, space is reserved for
+ // all the columns including those coming from other ValueAccessors.
+ // Thereafter, in a given round, we only insert the remaining columns of the
+ // same tuples from the other ValueAccessors.
+ for (auto &p : reduced_accessor_attribute_map) {
+ ValueAccessor *accessor = p.first;
+ std::vector<attribute_id> attribute_map = p.second;
+
+
+ InvokeOnAnyValueAccessor(
+ accessor,
+ [&](auto *accessor) -> void { // NOLINT(build/c++11)
+ num_tuples_inserted = output_block->bulkInsertPartialTuples(
+ attribute_map, accessor, num_tuples_to_insert);
+ });
+
+ if (accessor == first_accessor) {
+ // Now we know how many full tuples can be inserted into this
+ // output_block (viz. number of tuples inserted from first ValueAccessor).
+ // We should only insert that many tuples from the remaining
+ // ValueAccessors as well.
+ num_tuples_to_insert = num_tuples_inserted;
+ } else {
+ // Since the bulk insertion of the first ValueAccessor should already
+ // have reserved the space for all the other ValueAccessors' columns,
+ // we must have been able to insert all the tuples we asked to insert.
+ DCHECK(num_tuples_inserted == num_tuples_to_insert);
+ }
+ }
+
+ // After one round of insertions, we have successfully inserted as many
+ // tuples as possible into the output_block. Strictly speaking, it's
+ // possible that there is more space for insertions because the size
+ // estimation of variable length columns is conservative. But we will ignore
+ // that case and proceed assuming that this output_block is full.
+
+ // Update the header for output_block and then return it.
+ output_block->bulkInsertPartialTuplesFinalize(num_tuples_inserted);
+ const bool mark_full = always_mark_full
+ || !first_accessor->iterationFinishedVirtual();
+ this->returnBlock(std::move(output_block), mark_full);
+ }
+}
+
void InsertDestination::insertTuplesFromVector(std::vector<Tuple>::const_iterator begin,
std::vector<Tuple>::const_iterator end) {
if (begin == end) {
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d11ec58/storage/InsertDestination.hpp
----------------------------------------------------------------------
diff --git a/storage/InsertDestination.hpp b/storage/InsertDestination.hpp
index 408e76b..3487638 100644
--- a/storage/InsertDestination.hpp
+++ b/storage/InsertDestination.hpp
@@ -152,6 +152,10 @@ class InsertDestination : public InsertDestinationInterface {
ValueAccessor *accessor,
bool always_mark_full = false) override;
+ void bulkInsertTuplesFromValueAccessors(
+ const std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> &accessor_attribute_map,
+ bool always_mark_full = false) override;
+
void insertTuplesFromVector(std::vector<Tuple>::const_iterator begin,
std::vector<Tuple>::const_iterator end) override;
@@ -313,6 +317,12 @@ class AlwaysCreateBlockInsertDestination : public InsertDestination {
~AlwaysCreateBlockInsertDestination() override {
}
+ void bulkInsertTuplesFromValueAccessors(
+ const std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> &accessor_attribute_map,
+ bool always_mark_full = false) override {
+ LOG(FATAL) << "bulkInsertTuplesFromValueAccessors is not implemented for AlwaysCreateBlockInsertDestination";
+ }
+
protected:
MutableBlockReference getBlockForInsertion() override;
@@ -517,6 +527,12 @@ class PartitionAwareInsertDestination : public InsertDestination {
ValueAccessor *accessor,
bool always_mark_full = false) override;
+ void bulkInsertTuplesFromValueAccessors(
+ const std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> &accessor_attribute_map,
+ bool always_mark_full = false) override {
+ LOG(FATAL) << "bulkInsertTuplesFromValueAccessors is not implemented for PartitionAwareInsertDestination";
+ }
+
void insertTuplesFromVector(std::vector<Tuple>::const_iterator begin,
std::vector<Tuple>::const_iterator end) override;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d11ec58/storage/InsertDestinationInterface.hpp
----------------------------------------------------------------------
diff --git a/storage/InsertDestinationInterface.hpp b/storage/InsertDestinationInterface.hpp
index 423dff1..b62d3e5 100644
--- a/storage/InsertDestinationInterface.hpp
+++ b/storage/InsertDestinationInterface.hpp
@@ -20,6 +20,7 @@
#ifndef QUICKSTEP_STORAGE_INSERT_DESTINATION_INTERFACE_HPP_
#define QUICKSTEP_STORAGE_INSERT_DESTINATION_INTERFACE_HPP_
+#include <utility>
#include <vector>
#include "catalog/CatalogTypedefs.hpp"
@@ -122,6 +123,27 @@ class InsertDestinationInterface {
bool always_mark_full = false) = 0;
/**
+ * @brief Bulk-insert tuples from one or more ValueAccessors
+ * into blocks managed by this InsertDestination.
+ *
+ * @warning It is implicitly assumed that all the input ValueAccessors have
+ * the same number of tuples in them.
+ *
+ * @param accessor_attribute_map A vector of pairs of ValueAccessor and
+ * corresponding attribute map
+ * The i-th attribute ID in the attr map for a value accessor is "n"
+ * if the attribute_id "i" in the output relation
+ * is the attribute_id "n" in corresponding input value accessor.
+ * Set the i-th element to kInvalidCatalogId if it doesn't come from
+ * the corresponding value accessor.
+ * @param always_mark_full If \c true, always mark the blocks full after
+ * insertion from ValueAccessor even when partially full.
+ **/
+ virtual void bulkInsertTuplesFromValueAccessors(
+ const std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> &accessor_attribute_map,
+ bool always_mark_full = false) = 0;
+
+ /**
* @brief Insert tuples from a range of Tuples in a vector.
* @warning Unlike bulkInsertTuples(), this is not well-optimized and not
* intended for general use. It should only be used by
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d11ec58/storage/SplitRowStoreTupleStorageSubBlock.hpp
----------------------------------------------------------------------
diff --git a/storage/SplitRowStoreTupleStorageSubBlock.hpp b/storage/SplitRowStoreTupleStorageSubBlock.hpp
index 681001e..89c756d 100644
--- a/storage/SplitRowStoreTupleStorageSubBlock.hpp
+++ b/storage/SplitRowStoreTupleStorageSubBlock.hpp
@@ -304,9 +304,9 @@ class SplitRowStoreTupleStorageSubBlock: public TupleStorageSubBlock {
tuple_id bulkInsertPartialTuples(
const std::vector<attribute_id> &attribute_map,
ValueAccessor *accessor,
- const tuple_id max_num_tuples_to_insert);
+ const tuple_id max_num_tuples_to_insert) override;
- void bulkInsertPartialTuplesFinalize(const tuple_id num_tuples_inserted);
+ void bulkInsertPartialTuplesFinalize(const tuple_id num_tuples_inserted) override;
const void* getAttributeValue(const tuple_id tuple,
const attribute_id attr) const override;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d11ec58/storage/StorageBlock.cpp
----------------------------------------------------------------------
diff --git a/storage/StorageBlock.cpp b/storage/StorageBlock.cpp
index ea74ee6..6267d6b 100644
--- a/storage/StorageBlock.cpp
+++ b/storage/StorageBlock.cpp
@@ -284,6 +284,30 @@ tuple_id StorageBlock::bulkInsertTuplesWithRemappedAttributes(
return num_inserted;
}
+tuple_id StorageBlock::bulkInsertPartialTuples(
+ const std::vector<attribute_id> &attribute_map,
+ ValueAccessor *accessor,
+ const tuple_id max_num_tuples_to_insert) {
+ const tuple_id num_inserted
+ = tuple_store_->bulkInsertPartialTuples(attribute_map,
+ accessor,
+ max_num_tuples_to_insert);
+ if (num_inserted != 0) {
+ invalidateAllIndexes();
+ dirty_ = true;
+ } else if (tuple_store_->isEmpty()) {
+ if (!accessor->iterationFinishedVirtual()) {
+ throw TupleTooLargeForBlock(0);
+ }
+ }
+ return num_inserted;
+}
+
+void StorageBlock::bulkInsertPartialTuplesFinalize(
+ const tuple_id num_tuples_inserted) {
+ tuple_store_->bulkInsertPartialTuplesFinalize(num_tuples_inserted);
+}
+
void StorageBlock::sample(const bool is_block_sample,
const int percentage,
InsertDestinationInterface *destination) const {
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d11ec58/storage/StorageBlock.hpp
----------------------------------------------------------------------
diff --git a/storage/StorageBlock.hpp b/storage/StorageBlock.hpp
index 56b3bdc..ed252c5 100644
--- a/storage/StorageBlock.hpp
+++ b/storage/StorageBlock.hpp
@@ -307,6 +307,7 @@ class StorageBlock : public StorageBlockBase {
* iteration will be advanced to the first non-inserted tuple or, if
* all accessible tuples were inserted in this block, to the end
* position.
+ * @param max_tuples_to_insert Insert at most these many tuples
* @return The number of tuples inserted from accessor.
**/
tuple_id bulkInsertTuplesWithRemappedAttributes(
@@ -314,6 +315,49 @@ class StorageBlock : public StorageBlockBase {
ValueAccessor *accessor);
/**
+ * @brief Insert up to max_num_tuples_to_insert tuples from a ValueAccessor
+ * as a single batch, using the attribute_map to project and reorder
+ * columns from the input ValueAccessor. Does not update header.
+ *
+ * @note Typical usage is where you want to bulk-insert columns from two
+ * or more value accessors. Instead of writing out the columns into
+ * one or more column vector value accessors, you can simply use this
+ * function with the appropriate attribute_map for each value
+ * accessor (InsertDestination::bulkInsertTuplesFromValueAccessors
+ * handles all the details) to insert tuples without an extra temp copy.
+ *
+ * @warning Must call bulkInsertPartialTuplesFinalize() to update the header,
+ * until which point, the insertion is not visible to others.
+ * @warning The inserted tuples may be placed in sub-optimal locations in this
+ * TupleStorageSubBlock.
+ *
+ * @param attribute_map A vector which maps the attributes of this
+ * TupleStorageSubBlock's relation (gaps indicated with kInvalidCatalogId)
+ * to the corresponding attributes which should be read from accessor.
+ * @param accessor A ValueAccessor to insert tuples from. The accessor's
+ * iteration will be advanced to the first non-inserted tuple or, if
+ * all accessible tuples were inserted in this sub-block, to the end
+ * position.
+ * @return The number of tuples inserted from accessor.
+ **/
+ tuple_id bulkInsertPartialTuples(
+ const std::vector<attribute_id> &attribute_map,
+ ValueAccessor *accessor,
+ const tuple_id max_num_tuples_to_insert);
+
+ /**
+ * @brief Update header after a bulkInsertPartialTuples.
+ *
+ * @warning Only call this after a bulkInsertPartialTuples, passing in the
+ * number of tuples that were inserted (return value of that function).
+ *
+ * @param num_tuples_inserted Number of tuples inserted (i.e., how much to
+ * advance the header.num_tuples by). Should be equal to the return
+ * value of bulkInsertPartialTuples.
+ **/
+ void bulkInsertPartialTuplesFinalize(tuple_id num_tuples_inserted);
+
+ /**
* @brief Get the IDs of tuples in this StorageBlock which match a given Predicate.
*
* @param predicate The predicate to match.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d11ec58/storage/TupleStorageSubBlock.hpp
----------------------------------------------------------------------
diff --git a/storage/TupleStorageSubBlock.hpp b/storage/TupleStorageSubBlock.hpp
index aed6eea..26e8027 100644
--- a/storage/TupleStorageSubBlock.hpp
+++ b/storage/TupleStorageSubBlock.hpp
@@ -272,6 +272,56 @@ class TupleStorageSubBlock {
ValueAccessor *accessor) = 0;
/**
+ * @brief Insert up to max_num_tuples_to_insert tuples from a ValueAccessor
+ * as a single batch, using the attribute_map to project and reorder
+ * columns from the input ValueAccessor. Does not update header.
+ *
+ * @note Typical usage is where you want to bulk-insert columns from two
+ * or more value accessors. Instead of writing out the columns into
+ * one or more column vector value accessors, you can simply use this
+ * function with the appropriate attribute_map for each value
+ * accessor (InsertDestination::bulkInsertTuplesFromValueAccessors
+ * handles all the details) to insert tuples without an extra temp copy.
+ *
+ * @warning Must call bulkInsertPartialTuplesFinalize() to update the header,
+ * until which point, the insertion is not visible to others.
+ * @warning The inserted tuples may be placed in a suboptimal position in the
+ * block.
+ *
+ * @param attribute_map A vector which maps the attributes of this
+ * TupleStorageSubBlock's relation (gaps indicated with kInvalidCatalogId)
+ * to the corresponding attributes which should be read from accessor.
+ * @param accessor A ValueAccessor to insert tuples from. The accessor's
+ * iteration will be advanced to the first non-inserted tuple or, if
+ * all accessible tuples were inserted in this sub-block, to the end
+ * position.
+ * @return The number of tuples inserted from accessor.
+ **/
+ virtual tuple_id bulkInsertPartialTuples(
+ const std::vector<attribute_id> &attribute_map,
+ ValueAccessor *accessor,
+ const tuple_id max_num_tuples_to_insert) {
+ LOG(FATAL) << "Partial bulk insert is not supported for this TupleStorageBlock type ("
+ << getTupleStorageSubBlockType() << ").";
+ }
+
+ /**
+ * @brief Update header after a bulkInsertPartialTuples.
+ *
+ * @warning Only call this after a bulkInsertPartialTuples, passing in the
+ * number of tuples that were inserted (return value of that function).
+ *
+ * @param num_tuples_inserted Number of tuples inserted (i.e., how much to
+ * advance the header.num_tuples by). Should be equal to the return
+ * value of bulkInsertPartialTuples.
+ **/
+ virtual void bulkInsertPartialTuplesFinalize(
+ const tuple_id num_tuples_inserted) {
+ LOG(FATAL) << "Partial bulk insert is not supported for this TupleStorageBlock type ("
+ << getTupleStorageSubBlockType() << ").";
+ }
+
+ /**
* @brief Get the (untyped) value of an attribute in a tuple in this buffer.
* @warning This method may not be supported for all implementations of
* TupleStorageSubBlock. supportsUntypedGetAttributeValue() MUST be
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d11ec58/types/containers/ColumnVectorsValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/types/containers/ColumnVectorsValueAccessor.hpp b/types/containers/ColumnVectorsValueAccessor.hpp
index fe413a0..fbbdc1b 100644
--- a/types/containers/ColumnVectorsValueAccessor.hpp
+++ b/types/containers/ColumnVectorsValueAccessor.hpp
@@ -139,6 +139,10 @@ class ColumnVectorsValueAccessor : public ValueAccessor {
return nullptr;
}
+ inline std::size_t getNumColumns() const {
+ return columns_.size();
+ }
+
template <bool check_null = true>
inline const void* getUntypedValue(const attribute_id attr_id) const {
return getUntypedValueAtAbsolutePosition<check_null>(attr_id, current_position_);