You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by zu...@apache.org on 2017/01/31 04:21:30 UTC

[1/2] incubator-quickstep git commit: Minor refactor for InsertDestinations. [Forced Update!]

Repository: incubator-quickstep
Updated Branches:
  refs/heads/refactor-inner-join 92fae47c5 -> 23e14b8e0 (forced update)


Minor refactor for InsertDestinations.


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/f2e77266
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/f2e77266
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/f2e77266

Branch: refs/heads/refactor-inner-join
Commit: f2e77266edeaff38a60650b48836ff6ddb3b84ca
Parents: 0f4938c
Author: Zuyu Zhang <zu...@apache.org>
Authored: Mon Jan 30 15:24:03 2017 -0800
Committer: Zuyu Zhang <zu...@apache.org>
Committed: Mon Jan 30 15:24:03 2017 -0800

----------------------------------------------------------------------
 storage/InsertDestination.cpp          | 17 ++++-------------
 storage/InsertDestination.hpp          |  4 +++-
 storage/InsertDestinationInterface.hpp |  2 +-
 storage/StorageBlock.hpp               |  2 +-
 4 files changed, 9 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/f2e77266/storage/InsertDestination.cpp
----------------------------------------------------------------------
diff --git a/storage/InsertDestination.cpp b/storage/InsertDestination.cpp
index 944998f..714e6e5 100644
--- a/storage/InsertDestination.cpp
+++ b/storage/InsertDestination.cpp
@@ -290,7 +290,6 @@ void InsertDestination::bulkInsertTuplesFromValueAccessors(
       ValueAccessor *accessor = p.first;
       std::vector<attribute_id> attribute_map = p.second;
 
-
       InvokeOnAnyValueAccessor(
           accessor,
           [&](auto *accessor) -> void {  // NOLINT(build/c++11)
@@ -621,11 +620,10 @@ void PartitionAwareInsertDestination::bulkInsertTuples(ValueAccessor *accessor,
        &always_mark_full,
        &num_partitions](auto *accessor) -> void {  // NOLINT(build/c++11)
     std::vector<std::unique_ptr<TupleIdSequence>> partition_membership;
-    partition_membership.resize(num_partitions);
 
     // Create a tuple-id sequence for each partition.
     for (std::size_t partition = 0; partition < num_partitions; ++partition) {
-      partition_membership[partition].reset(new TupleIdSequence(accessor->getEndPosition()));
+      partition_membership.emplace_back(std::make_unique<TupleIdSequence>(accessor->getEndPosition()));
     }
 
     // Iterate over ValueAccessor for each tuple,
@@ -641,9 +639,8 @@ void PartitionAwareInsertDestination::bulkInsertTuples(ValueAccessor *accessor,
     // TupleIdSequence.
     std::vector<std::unique_ptr<typename std::remove_pointer<
         decltype(accessor->createSharedTupleIdSequenceAdapter(*partition_membership.front()))>::type>> adapter;
-    adapter.resize(num_partitions);
     for (std::size_t partition = 0; partition < num_partitions; ++partition) {
-      adapter[partition].reset(accessor->createSharedTupleIdSequenceAdapter(*partition_membership[partition]));
+      adapter.emplace_back(accessor->createSharedTupleIdSequenceAdapter(*partition_membership[partition]));
     }
 
     // Bulk-insert into a block belonging to the partition.
@@ -678,11 +675,10 @@ void PartitionAwareInsertDestination::bulkInsertTuplesWithRemappedAttributes(
        &always_mark_full,
        &num_partitions](auto *accessor) -> void {  // NOLINT(build/c++11)
     std::vector<std::unique_ptr<TupleIdSequence>> partition_membership;
-    partition_membership.resize(num_partitions);
 
     // Create a tuple-id sequence for each partition.
     for (std::size_t partition = 0; partition < num_partitions; ++partition) {
-      partition_membership[partition].reset(new TupleIdSequence(accessor->getEndPosition()));
+      partition_membership.emplace_back(std::make_unique<TupleIdSequence>(accessor->getEndPosition()));
     }
 
     // Iterate over ValueAccessor for each tuple,
@@ -698,9 +694,8 @@ void PartitionAwareInsertDestination::bulkInsertTuplesWithRemappedAttributes(
     // TupleIdSequence.
     std::vector<std::unique_ptr<typename std::remove_pointer<
         decltype(accessor->createSharedTupleIdSequenceAdapter(*partition_membership.front()))>::type>> adapter;
-    adapter.resize(num_partitions);
     for (std::size_t partition = 0; partition < num_partitions; ++partition) {
-      adapter[partition].reset(accessor->createSharedTupleIdSequenceAdapter(*partition_membership[partition]));
+      adapter.emplace_back(accessor->createSharedTupleIdSequenceAdapter(*partition_membership[partition]));
     }
 
     // Bulk-insert into a block belonging to the partition.
@@ -742,10 +737,6 @@ void PartitionAwareInsertDestination::insertTuplesFromVector(std::vector<Tuple>:
   }
 }
 
-MutableBlockReference PartitionAwareInsertDestination::getBlockForInsertion() {
-  FATAL_ERROR("PartitionAwareInsertDestination::getBlockForInsertion needs a partition id as an argument.");
-}
-
 MutableBlockReference PartitionAwareInsertDestination::getBlockForInsertionInPartition(const partition_id part_id) {
   DCHECK_LT(part_id, partition_scheme_header_->getNumPartitions());
   SpinMutexLock lock(mutexes_for_partition_[part_id]);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/f2e77266/storage/InsertDestination.hpp
----------------------------------------------------------------------
diff --git a/storage/InsertDestination.hpp b/storage/InsertDestination.hpp
index c3c40bd..6707192 100644
--- a/storage/InsertDestination.hpp
+++ b/storage/InsertDestination.hpp
@@ -539,7 +539,9 @@ class PartitionAwareInsertDestination : public InsertDestination {
                               std::vector<Tuple>::const_iterator end) override;
 
  protected:
-  MutableBlockReference getBlockForInsertion() override;
+  MutableBlockReference getBlockForInsertion() override {
+    LOG(FATAL) << "PartitionAwareInsertDestination::getBlockForInsertion needs a partition id as an argument.";
+  }
 
   /**
    * @brief Get a block to use for insertion from a partition.

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/f2e77266/storage/InsertDestinationInterface.hpp
----------------------------------------------------------------------
diff --git a/storage/InsertDestinationInterface.hpp b/storage/InsertDestinationInterface.hpp
index b62d3e5..be6b0c2 100644
--- a/storage/InsertDestinationInterface.hpp
+++ b/storage/InsertDestinationInterface.hpp
@@ -131,7 +131,7 @@ class InsertDestinationInterface {
    *
    * @param accessor_attribute_map A vector of pairs of ValueAccessor and
    *        corresponding attribute map
-   *        The i-th attribute ID in the attr map for a value accessor is "n" 
+   *        The i-th attribute ID in the attr map for a value accessor is "n"
    *        if the attribute_id "i" in the output relation
    *        is the attribute_id "n" in corresponding input value accessor.
    *        Set the i-th element to kInvalidCatalogId if it doesn't come from

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/f2e77266/storage/StorageBlock.hpp
----------------------------------------------------------------------
diff --git a/storage/StorageBlock.hpp b/storage/StorageBlock.hpp
index ed252c5..16ea50f 100644
--- a/storage/StorageBlock.hpp
+++ b/storage/StorageBlock.hpp
@@ -325,7 +325,7 @@ class StorageBlock : public StorageBlockBase {
    *       function with the appropriate attribute_map for each value
    *       accessor (InsertDestination::bulkInsertTuplesFromValueAccessors
    *       handles all the details) to insert tuples without an extra temp copy.
-   * 
+   *
    * @warning Must call bulkInsertPartialTuplesFinalize() to update the header,
    *          until which point, the insertion is not visible to others.
    * @warning The inserted tuples may be placed in sub-optimal locations in this


[2/2] incubator-quickstep git commit: Minor refactor for HashJoinInnerJoin.

Posted by zu...@apache.org.
Minor refactor for HashJoinInnerJoin.


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/23e14b8e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/23e14b8e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/23e14b8e

Branch: refs/heads/refactor-inner-join
Commit: 23e14b8e078f42a8d3e5f6c0c4885dee271d99aa
Parents: f2e7726
Author: Zuyu Zhang <zu...@apache.org>
Authored: Mon Jan 30 15:28:49 2017 -0800
Committer: Zuyu Zhang <zu...@apache.org>
Committed: Mon Jan 30 20:21:23 2017 -0800

----------------------------------------------------------------------
 relational_operators/CMakeLists.txt       |  1 +
 relational_operators/HashJoinOperator.cpp | 42 ++++++++++++++------------
 2 files changed, 23 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/23e14b8e/relational_operators/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/relational_operators/CMakeLists.txt b/relational_operators/CMakeLists.txt
index c1caaa3..b2e08cf 100644
--- a/relational_operators/CMakeLists.txt
+++ b/relational_operators/CMakeLists.txt
@@ -199,6 +199,7 @@ target_link_libraries(quickstep_relationaloperators_FinalizeAggregationOperator
 target_link_libraries(quickstep_relationaloperators_HashJoinOperator
                       ${GFLAGS_LIB_NAME}
                       glog
+                      quickstep_catalog_CatalogAttribute
                       quickstep_catalog_CatalogRelation
                       quickstep_catalog_CatalogRelationSchema
                       quickstep_catalog_CatalogTypedefs

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/23e14b8e/relational_operators/HashJoinOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.cpp b/relational_operators/HashJoinOperator.cpp
index fd3841f..7394554 100644
--- a/relational_operators/HashJoinOperator.cpp
+++ b/relational_operators/HashJoinOperator.cpp
@@ -25,6 +25,7 @@
 #include <utility>
 #include <vector>
 
+#include "catalog/CatalogAttribute.hpp"
 #include "catalog/CatalogRelation.hpp"
 #include "catalog/CatalogRelationSchema.hpp"
 #include "catalog/CatalogTypedefs.hpp"
@@ -165,6 +166,12 @@ class OuterJoinTupleCollector {
   TupleIdSequence *filter_;
 };
 
+// For InnerJoin.
+constexpr std::size_t kNumValueAccessors = 3u;
+constexpr std::size_t kBuildValueAccessorIndex = 0,
+                      kProbeValueAccessorIndex = 1u,
+                      kTempResultValueAccessorIndex = 2u;
+
 }  // namespace
 
 bool HashJoinOperator::getAllWorkOrders(
@@ -565,31 +572,27 @@ void HashInnerJoinWorkOrder::execute() {
         });
     }
 
-
     // We also need a temp value accessor to store results of any scalar expressions.
     ColumnVectorsValueAccessor temp_result;
 
     // Create a map of ValueAccessors and what attributes we want to pick from them
-    std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> accessor_attribute_map;
-    const std::vector<ValueAccessor *> accessors{
-        ordered_build_accessor.get(), ordered_probe_accessor.get(), &temp_result};
-    const unsigned int build_index = 0, probe_index = 1, temp_index = 2;
-    for (auto &accessor : accessors) {
-      accessor_attribute_map.push_back(std::make_pair(
-          accessor,
-          std::vector<attribute_id>(selection_.size(), kInvalidCatalogId)));
-    }
+    std::vector<std::pair<ValueAccessor *, std::vector<attribute_id>>> accessor_attribute_map(
+        kNumValueAccessors, std::make_pair(nullptr,  // A late binding ValueAccessor.
+                                           vector<attribute_id>(selection_.size(), kInvalidCatalogId)));
 
-    attribute_id dest_attr = 0;
-    std::vector<std::pair<tuple_id, tuple_id>> zipped_joined_tuple_ids;
+    accessor_attribute_map[kBuildValueAccessorIndex].first = ordered_build_accessor.get();
+    accessor_attribute_map[kProbeValueAccessorIndex].first = ordered_probe_accessor.get();
+    accessor_attribute_map[kTempResultValueAccessorIndex].first = &temp_result;
 
+    attribute_id dest_attr = 0;
     for (auto &selection_cit : selection_) {
       // If the Scalar (column) is not an attribute in build/probe blocks, then
       // insert it into a ColumnVectorsValueAccessor.
       if (selection_cit->getDataSource() != Scalar::ScalarDataSource::kAttribute) {
         // Current destination attribute maps to the column we'll create now.
-        accessor_attribute_map[temp_index].second[dest_attr] = temp_result.getNumColumns();
+        accessor_attribute_map[kTempResultValueAccessorIndex].second[dest_attr] = temp_result.getNumColumns();
 
+        std::vector<std::pair<tuple_id, tuple_id>> zipped_joined_tuple_ids;
         if (temp_result.getNumColumns() == 0) {
           // The getAllValuesForJoin function below needs joined tuple IDs as
           // a vector of pair of (build-tuple-ID, probe-tuple-ID), and we have
@@ -599,9 +602,8 @@ void HashInnerJoinWorkOrder::execute() {
           // they don't have scalar expressions with attributes from both
           // build and probe relations (other expressions would have been
           // pushed down to before the join).
-          zipped_joined_tuple_ids.reserve(build_tids.size());
           for (std::size_t i = 0; i < build_tids.size(); ++i) {
-            zipped_joined_tuple_ids.push_back(std::make_pair(build_tids[i], probe_tids[i]));
+            zipped_joined_tuple_ids.emplace_back(build_tids[i], probe_tids[i]);
           }
         }
         temp_result.addColumn(
@@ -610,12 +612,12 @@ void HashInnerJoinWorkOrder::execute() {
                                       probe_relation_id, probe_accessor.get(),
                                       zipped_joined_tuple_ids));
       } else {
-        auto scalar_attr = static_cast<const ScalarAttribute *>(selection_cit.get());
-        const attribute_id attr_id = scalar_attr->getAttribute().getID();
-        if (scalar_attr->getAttribute().getParent().getID() == build_relation_id) {
-          accessor_attribute_map[build_index].second[dest_attr] = attr_id;
+        const CatalogAttribute &attr = static_cast<const ScalarAttribute *>(selection_cit.get())->getAttribute();
+        const attribute_id attr_id = attr.getID();
+        if (attr.getParent().getID() == build_relation_id) {
+          accessor_attribute_map[kBuildValueAccessorIndex].second[dest_attr] = attr_id;
         } else {
-          accessor_attribute_map[probe_index].second[dest_attr] = attr_id;
+          accessor_attribute_map[kProbeValueAccessorIndex].second[dest_attr] = attr_id;
         }
       }
       ++dest_attr;