You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by na...@apache.org on 2016/09/02 23:36:07 UTC
[2/7] incubator-quickstep git commit: WIP: Add partial bulk insert
function in InsertDestination.
WIP: Add partial bulk insert function in InsertDestination.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/69cc69b0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/69cc69b0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/69cc69b0
Branch: refs/heads/col_vec_opt
Commit: 69cc69b098d9d58caa11c9e501e718bf4aad9134
Parents: cc47933
Author: Navneet Potti <na...@gmail.com>
Authored: Wed Aug 10 16:41:46 2016 -0500
Committer: Navneet Potti <na...@gmail.com>
Committed: Wed Aug 10 16:41:46 2016 -0500
----------------------------------------------------------------------
storage/InsertDestination.cpp | 66 +++++++++++++++++++++++++++++
storage/InsertDestination.hpp | 16 +++++++
storage/InsertDestinationInterface.hpp | 22 ++++++++++
storage/StorageBlock.cpp | 6 ++-
storage/StorageBlock.hpp | 49 ++++++++++++++++++++-
storage/TupleStorageSubBlock.hpp | 46 ++++++++++++++++++++
6 files changed, 202 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/69cc69b0/storage/InsertDestination.cpp
----------------------------------------------------------------------
diff --git a/storage/InsertDestination.cpp b/storage/InsertDestination.cpp
index 9897aed..6b20bb5 100644
--- a/storage/InsertDestination.cpp
+++ b/storage/InsertDestination.cpp
@@ -247,6 +247,72 @@ void InsertDestination::bulkInsertTuplesWithRemappedAttributes(
});
}
+void InsertDestination::bulkInsertTuplesFromValueAccessors(
+ std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> accessor_attribute_map,
+ bool always_mark_full) {
+ // Handle pathological corner case where there are no accessors
+ if (accessor_attribute_map.size() != 0)
+ return;
+
+ // First off, initialize iteration through each accessor
+ for (auto &p : accessor_attribute_map) {
+ p.first->beginIterationVirtual();
+ }
+
+ // We assume that all input accessors have the same number of tuples, so
+ // the iterations finish together. Therefore, we can just check the first one.
+ auto first_accessor = accessor_attribute_map[0].first;
+ while (!first_accessor->iterationFinishedVirtual()) {
+ tuple_id num_tuples_to_insert = kCatalogMaxID;
+ tuple_id num_tuples_inserted = 0;
+ MutableBlockReference output_block = this->getBlockForInsertion();
+
+ // Now iterate through all the accessors and do one round of bulk-insertion
+ // of partial tuples into the selected output_block.
+ // While inserting from the first ValueAccessor, space is reserved for
+ // all the columns including those coming from other ValueAccessors.
+ // Thereafter, in a given round, we only insert the remaining columns of the
+ // same tuples from the other ValueAccessors.
+ for (auto &p : accessor_attribute_map) {
+ ValueAccessor *accessor = p.first;
+ std::vector<attribute_id> attribute_map = p.second;
+
+ InvokeOnAnyValueAccessor(
+ accessor,
+ [&](auto *accessor) -> void { // NOLINT(build/c++11)
+ num_tuples_inserted = output_block->bulkInsertPartialTuples(
+ attribute_map, accessor, num_tuples_to_insert);
+ });
+
+ if (accessor == first_accessor) {
+ // Now we know how many full tuples can be inserted into this
+ // output_block (viz. number of tuples inserted from first ValueAccessor).
+ // We should only insert that many tuples from the remaining
+ // ValueAccessors as well.
+ num_tuples_to_insert = num_tuples_inserted;
+ }
+ else {
+ // Since the bulk insertion of the first ValueAccessor should already
+ // have reserved the space for all the other ValueAccessors' columns,
+ // we must have been able to insert all the tuples we asked to insert.
+ DCHECK(num_tuples_inserted == num_tuples_to_insert);
+ }
+ }
+
+ // After one round of insertions, we have successfully inserted as many
+ // tuples as possible into the output_block. Strictly speaking, it's
+ // possible that there is more space for insertions because the size
+ // estimation of variable length columns is conservative. But we will ignore
+ // that case and proceed assuming that this output_block is full.
+
+ // Update the header for output_block and then return it.
+ output_block->bulkInsertPartialTuplesFinalize(num_tuples_inserted);
+ const bool mark_full = always_mark_full
+ || !first_accessor->iterationFinishedVirtual();
+ this->returnBlock(std::move(output_block), mark_full);
+ }
+}
+
void InsertDestination::insertTuplesFromVector(std::vector<Tuple>::const_iterator begin,
std::vector<Tuple>::const_iterator end) {
if (begin == end) {
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/69cc69b0/storage/InsertDestination.hpp
----------------------------------------------------------------------
diff --git a/storage/InsertDestination.hpp b/storage/InsertDestination.hpp
index 3dae9a0..41d0c96 100644
--- a/storage/InsertDestination.hpp
+++ b/storage/InsertDestination.hpp
@@ -152,6 +152,10 @@ class InsertDestination : public InsertDestinationInterface {
ValueAccessor *accessor,
bool always_mark_full = false) override;
+ void bulkInsertTuplesFromValueAccessors(
+ std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> accessor_attribute_map,
+ bool always_mark_full = false) override;
+
void insertTuplesFromVector(std::vector<Tuple>::const_iterator begin,
std::vector<Tuple>::const_iterator end) override;
@@ -315,6 +319,12 @@ class AlwaysCreateBlockInsertDestination : public InsertDestination {
~AlwaysCreateBlockInsertDestination() override {
}
+ void bulkInsertTuplesFromValueAccessors(
+ std::unordered_map<ValueAccessor *, std::vector<attribute_id>> accessor_attribute_map,
+ bool always_mark_full = false) override {
+ FATAL_ERROR("bulkInsertTuplesFromValueAccessors is not implemented for AlwaysCreateBlockInsertDestination");
+ }
+
protected:
MutableBlockReference getBlockForInsertion() override;
@@ -519,6 +529,12 @@ class PartitionAwareInsertDestination : public InsertDestination {
ValueAccessor *accessor,
bool always_mark_full = false) override;
+ void bulkInsertTuplesFromValueAccessors(
+ std::unordered_map<ValueAccessor *, std::vector<attribute_id>> accessor_attribute_map,
+ bool always_mark_full = false) override {
+ FATAL_ERROR("bulkInsertTuplesFromValueAccessors is not implemented for PartitionAwareInsertDestination");
+ }
+
void insertTuplesFromVector(std::vector<Tuple>::const_iterator begin,
std::vector<Tuple>::const_iterator end) override;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/69cc69b0/storage/InsertDestinationInterface.hpp
----------------------------------------------------------------------
diff --git a/storage/InsertDestinationInterface.hpp b/storage/InsertDestinationInterface.hpp
index 423dff1..cf1a35d 100644
--- a/storage/InsertDestinationInterface.hpp
+++ b/storage/InsertDestinationInterface.hpp
@@ -20,6 +20,7 @@
#ifndef QUICKSTEP_STORAGE_INSERT_DESTINATION_INTERFACE_HPP_
#define QUICKSTEP_STORAGE_INSERT_DESTINATION_INTERFACE_HPP_
+#include <unordered_map>
#include <vector>
#include "catalog/CatalogTypedefs.hpp"
@@ -122,6 +123,27 @@ class InsertDestinationInterface {
bool always_mark_full = false) = 0;
/**
+ * @brief Bulk-insert tuples from one or more ValueAccessors
+ * into blocks managed by this InsertDestination.
+ *
+ * @warning It is implicitly assumed that all the input ValueAccessors have
+ * the same number of tuples in them.
+ *
+ * @param accessor_attribute_map A vector of pairs of ValueAccessor and
+ * corresponding attribute map
+ * The i-th attribute ID in the attr map for a value accessor is "n"
+ * if the attribute_id "i" in the output relation
+ * is the attribute_id "n" in corresponding input value accessor.
+ * Set the i-th element to kInvalidCatalogId if it doesn't come from
+ * the corresponding value accessor.
+ * @param always_mark_full If \c true, always mark the blocks full after
+ * insertion from ValueAccessor even when partially full.
+ **/
+ virtual void bulkInsertTuplesFromValueAccessors(
+ std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> accessor_attribute_map,
+ bool always_mark_full = false) = 0;
+
+ /**
* @brief Insert tuples from a range of Tuples in a vector.
* @warning Unlike bulkInsertTuples(), this is not well-optimized and not
* intended for general use. It should only be used by
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/69cc69b0/storage/StorageBlock.cpp
----------------------------------------------------------------------
diff --git a/storage/StorageBlock.cpp b/storage/StorageBlock.cpp
index 21aa12c..6493d55 100644
--- a/storage/StorageBlock.cpp
+++ b/storage/StorageBlock.cpp
@@ -269,10 +269,12 @@ tuple_id StorageBlock::bulkInsertTuples(ValueAccessor *accessor) {
tuple_id StorageBlock::bulkInsertTuplesWithRemappedAttributes(
const std::vector<attribute_id> &attribute_map,
- ValueAccessor *accessor) {
+ ValueAccessor *accessor,
+ const tuple_id max_tuples_to_insert) {
const tuple_id num_inserted
= tuple_store_->bulkInsertTuplesWithRemappedAttributes(attribute_map,
- accessor);
+ accessor,
+ max_tuples_to_insert);
if (num_inserted != 0) {
invalidateAllIndexes();
dirty_ = true;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/69cc69b0/storage/StorageBlock.hpp
----------------------------------------------------------------------
diff --git a/storage/StorageBlock.hpp b/storage/StorageBlock.hpp
index 97b4773..1562f9d 100644
--- a/storage/StorageBlock.hpp
+++ b/storage/StorageBlock.hpp
@@ -306,13 +306,60 @@ class StorageBlock : public StorageBlockBase {
* iteration will be advanced to the first non-inserted tuple or, if
* all accessible tuples were inserted in this block, to the end
* position.
+ * @param max_tuples_to_insert Insert at most these many tuples
* @return The number of tuples inserted from accessor.
**/
tuple_id bulkInsertTuplesWithRemappedAttributes(
const std::vector<attribute_id> &attribute_map,
- ValueAccessor *accessor);
+ ValueAccessor *accessor);
/**
+ * @brief Insert up to max_num_tuples_to_insert tuples from a ValueAccessor
+ * as a single batch, using the attribute_map to project and reorder
+ * columns from the input ValueAccessor. Does not update header.
+ *
+ * @note Typical usage is where you want to bulk-insert columns from two
+ * or more value accessors. Instead of writing out the columns into
+ * one or more column vector value accessors, you can simply use this
+ * function with the appropriate attribute_map for each value
+ * accessor (InsertDestination::bulkInsertTuplesFromValueAccessors
+ * handles all the details) to insert tuples without an extra temp copy.
+ *
+ * @warning Must call bulkInsertPartialTuplesFinalize() to update the header,
+ * until which point, the insertion is not visible to others.
+ * @warning The inserted tuples may be placed in an "incorrect" or
+ * sub-optimal locations in this TupleStorageSubBlock. The only
+ * methods which are safe to call between bulkInsertTuples() and
+ * rebuild() are insertTupleInBatch(), bulkInsertTuples(), and
+ * bulkInsertTuplesWithRemappedAttributes().
+ *
+ * @param attribute_map A vector which maps the attributes of this
+ * TupleStorageSubBlock's relation (gaps indicated with kInvalidCatalogId)
+ * to the corresponding attributes which should be read from accessor.
+ * @param accessor A ValueAccessor to insert tuples from. The accessor's
+ * iteration will be advanced to the first non-inserted tuple or, if
+ * all accessible tuples were inserted in this sub-block, to the end
+ * position.
+ * @return The number of tuples inserted from accessor.
+ **/
+ virtual tuple_id bulkInsertPartialTuples(
+ const std::vector<attribute_id> &attribute_map,
+ ValueAccessor *accessor,
+ tuple_id max_num_tuples_to_insert) = 0;
+
+ /**
+ * @brief Update header after a bulkInsertPartialTuples.
+ *
+ * @warning Only call this after a bulkInsertPartialTuples, passing in the
+ * number of tuples that were inserted (return value of that function).
+ *
+ * @param num_tuples_inserted Number of tuples inserted (i.e., how much to
+ * advance the header.num_tuples by). Should be equal to the return
+ * value of bulkInsertPartialTuples.
+ **/
+ virtual void bulkInsertPartialTuplesFinalize(tuple_id num_tuples_inserted) = 0;
+
+ /**
* @brief Perform a random sampling of data on the StorageBlock. The number
* of records sampled is determined by the sample percentage in case of
* tuple sample. For block sample all the records in a block are taken.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/69cc69b0/storage/TupleStorageSubBlock.hpp
----------------------------------------------------------------------
diff --git a/storage/TupleStorageSubBlock.hpp b/storage/TupleStorageSubBlock.hpp
index aed6eea..f937080 100644
--- a/storage/TupleStorageSubBlock.hpp
+++ b/storage/TupleStorageSubBlock.hpp
@@ -272,6 +272,52 @@ class TupleStorageSubBlock {
ValueAccessor *accessor) = 0;
/**
+ * @brief Insert up to max_num_tuples_to_insert tuples from a ValueAccessor
+ * as a single batch, using the attribute_map to project and reorder
+ * columns from the input ValueAccessor. Does not update header.
+ *
+ * @note Typical usage is where you want to bulk-insert columns from two
+ * or more value accessors. Instead of writing out the columns into
+ * one or more column vector value accessors, you can simply use this
+ * function with the appropriate attribute_map for each value
+ * accessor (InsertDestination::bulkInsertTuplesFromValueAccessors
+ * handles all the details) to insert tuples without an extra temp copy.
+ *
+ * @warning Must call bulkInsertPartialTuplesFinalize() to update the header,
+ * until which point, the insertion is not visible to others.
+ * @warning The inserted tuples may be placed in an "incorrect" or
+ * sub-optimal locations in this TupleStorageSubBlock. The only
+ * methods which are safe to call between bulkInsertTuples() and
+ * rebuild() are insertTupleInBatch(), bulkInsertTuples(), and
+ * bulkInsertTuplesWithRemappedAttributes().
+ *
+ * @param attribute_map A vector which maps the attributes of this
+ * TupleStorageSubBlock's relation (gaps indicated with kInvalidCatalogId)
+ * to the corresponding attributes which should be read from accessor.
+ * @param accessor A ValueAccessor to insert tuples from. The accessor's
+ * iteration will be advanced to the first non-inserted tuple or, if
+ * all accessible tuples were inserted in this sub-block, to the end
+ * position.
+ * @return The number of tuples inserted from accessor.
+ **/
+ virtual tuple_id bulkInsertPartialTuples(
+ const std::vector<attribute_id> &attribute_map,
+ ValueAccessor *accessor,
+ tuple_id max_num_tuples_to_insert) = 0;
+
+ /**
+ * @brief Update header after a bulkInsertPartialTuples.
+ *
+ * @warning Only call this after a bulkInsertPartialTuples, passing in the
+ * number of tuples that were inserted (return value of that function).
+ *
+ * @param num_tuples_inserted Number of tuples inserted (i.e., how much to
+ * advance the header.num_tuples by). Should be equal to the return
+ * value of bulkInsertPartialTuples.
+ **/
+ virtual void bulkInsertPartialTuplesFinalize(tuple_id num_tuples_inserted) = 0;
+
+ /**
* @brief Get the (untyped) value of an attribute in a tuple in this buffer.
* @warning This method may not be supported for all implementations of
* TupleStorageSubBlock. supportsUntypedGetAttributeValue() MUST be