You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by ji...@apache.org on 2016/07/05 22:15:57 UTC
incubator-quickstep git commit: Initial commit
Repository: incubator-quickstep
Updated Branches:
refs/heads/stable-query-plans-ssb [created] 14f726e1d
Initial commit
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/14f726e1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/14f726e1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/14f726e1
Branch: refs/heads/stable-query-plans-ssb
Commit: 14f726e1dc3af81e9aa6aeb068b89d2cc2977d62
Parents: 31f1bbb
Author: Jianqiao Zhu <ji...@cs.wisc.edu>
Authored: Tue Jul 5 17:14:40 2016 -0500
Committer: Jianqiao Zhu <ji...@cs.wisc.edu>
Committed: Tue Jul 5 17:15:01 2016 -0500
----------------------------------------------------------------------
compression/CompressionDictionaryLite.hpp | 42 ++++++
query_optimizer/ExecutionHeuristics.cpp | 17 +--
query_optimizer/ExecutionHeuristics.hpp | 2 +-
storage/BasicColumnStoreValueAccessor.hpp | 26 +++-
storage/CMakeLists.txt | 1 +
storage/CompressedColumnStoreValueAccessor.hpp | 22 ++++
.../CompressedPackedRowStoreValueAccessor.hpp | 22 ++++
storage/HashTable.hpp | 28 ++--
storage/PackedRowStoreValueAccessor.hpp | 25 +++-
storage/SplitRowStoreValueAccessor.hpp | 45 +++++++
storage/ValueAccessor.hpp | 36 ++++++
types/containers/ColumnVector.hpp | 35 +++++
types/containers/ColumnVectorsValueAccessor.hpp | 17 +++
utility/BloomFilterAdapter.hpp | 128 +++++++++++++++++++
utility/CMakeLists.txt | 6 +
15 files changed, 413 insertions(+), 39 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/14f726e1/compression/CompressionDictionaryLite.hpp
----------------------------------------------------------------------
diff --git a/compression/CompressionDictionaryLite.hpp b/compression/CompressionDictionaryLite.hpp
index 45019c0..8c7741f 100644
--- a/compression/CompressionDictionaryLite.hpp
+++ b/compression/CompressionDictionaryLite.hpp
@@ -174,6 +174,15 @@ class CompressionDictionaryLite {
}
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLengthForCode(const std::uint32_t code) const {
+ if (type_is_variable_length_) {
+ return variableLengthGetUntypedValueAndByteLengthHelper<std::uint32_t, check_null>(code);
+ } else {
+ return fixedLengthGetUntypedValueAndByteLengthHelper<std::uint32_t, check_null>(code);
+ }
+ }
+
/**
* @brief Get the value represented by the specified code as a TypedValue.
* @note This version is for codes of 8 bits or less. Also see
@@ -255,6 +264,39 @@ class CompressionDictionaryLite {
return retval;
}
+ template <typename CodeType, bool check_null = true>
+ inline std::pair<const void*, std::size_t> fixedLengthGetUntypedValueAndByteLengthHelper(
+ const CodeType code) const {
+ if (check_null && (code == getNullCode())) {
+ return std::make_pair(nullptr, 0);
+ }
+ DCHECK_LT(code, numberOfCodes());
+ return std::make_pair(static_cast<const char*>(dictionary_memory_)
+ + 2 * sizeof(std::uint32_t) // Header.
+ + code * type_fixed_byte_length_, // Index into value array.
+ type_fixed_byte_length_);
+ }
+
+ template <typename CodeType, bool check_null = true>
+ inline std::pair<const void*, std::size_t> variableLengthGetUntypedValueAndByteLengthHelper(
+ const CodeType code) const {
+ if (check_null && (code == getNullCode())) {
+ return std::make_pair(nullptr, 0);
+ }
+ DCHECK_LT(code, numberOfCodes());
+
+ const std::uint32_t value_offset = static_cast<const std::uint32_t*>(dictionary_memory_)[code + 2];
+ const void *data_ptr = variable_length_data_region_ + value_offset;
+ DCHECK_LT(data_ptr, static_cast<const char*>(dictionary_memory_) + dictionary_memory_size_);
+
+ std::size_t data_size = (code == *static_cast<const std::uint32_t*>(dictionary_memory_) - 1) ?
+ (static_cast<const char*>(dictionary_memory_)
+ + dictionary_memory_size_
+ - static_cast<const char*>(data_ptr))
+ : (static_cast<const std::uint32_t*>(dictionary_memory_)[code + 3] - value_offset);
+ return std::make_pair(data_ptr, data_size);
+ }
+
template <typename CodeType>
inline TypedValue fixedLengthGetTypedValueHelper(const CodeType code) const {
if (code == getNullCode()) {
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/14f726e1/query_optimizer/ExecutionHeuristics.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionHeuristics.cpp b/query_optimizer/ExecutionHeuristics.cpp
index fc31c53..1f2163e 100644
--- a/query_optimizer/ExecutionHeuristics.cpp
+++ b/query_optimizer/ExecutionHeuristics.cpp
@@ -107,20 +107,9 @@ void ExecutionHeuristics::optimizeExecutionPlan(QueryPlan *query_plan,
void ExecutionHeuristics::setBloomFilterProperties(serialization::BloomFilter *bloom_filter_proto,
const CatalogRelation *relation) {
- const std::size_t cardinality = relation->estimateTupleCardinality();
- if (cardinality < kOneThousand) {
- bloom_filter_proto->set_bloom_filter_size(kOneThousand / kCompressionFactor);
- bloom_filter_proto->set_number_of_hashes(kVeryLowSparsityHash);
- } else if (cardinality < kTenThousand) {
- bloom_filter_proto->set_bloom_filter_size(kTenThousand / kCompressionFactor);
- bloom_filter_proto->set_number_of_hashes(kLowSparsityHash);
- } else if (cardinality < kHundredThousand) {
- bloom_filter_proto->set_bloom_filter_size(kHundredThousand / kCompressionFactor);
- bloom_filter_proto->set_number_of_hashes(kMediumSparsityHash);
- } else {
- bloom_filter_proto->set_bloom_filter_size(kMillion / kCompressionFactor);
- bloom_filter_proto->set_number_of_hashes(kHighSparsityHash);
- }
+ const std::size_t cardinality = relation->getStatistics().getNumTuples();
+ bloom_filter_proto->set_bloom_filter_size(cardinality);
+ bloom_filter_proto->set_number_of_hashes(3);
}
} // namespace optimizer
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/14f726e1/query_optimizer/ExecutionHeuristics.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionHeuristics.hpp b/query_optimizer/ExecutionHeuristics.hpp
index 92a7fe8..c43d591 100644
--- a/query_optimizer/ExecutionHeuristics.hpp
+++ b/query_optimizer/ExecutionHeuristics.hpp
@@ -49,7 +49,7 @@ class ExecutionHeuristics {
static const std::size_t kHundredThousand = 100000;
static const std::size_t kMillion = 1000000;
- static const std::size_t kCompressionFactor = 10;
+ static const std::size_t kCompressionFactor = 1;
static const std::size_t kVeryLowSparsityHash = 1;
static const std::size_t kLowSparsityHash = 2;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/14f726e1/storage/BasicColumnStoreValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/BasicColumnStoreValueAccessor.hpp b/storage/BasicColumnStoreValueAccessor.hpp
index 759e187..7907fd5 100644
--- a/storage/BasicColumnStoreValueAccessor.hpp
+++ b/storage/BasicColumnStoreValueAccessor.hpp
@@ -18,6 +18,8 @@
#ifndef QUICKSTEP_STORAGE_BASIC_COLUMN_STORE_VALUE_ACCESSOR_HPP_
#define QUICKSTEP_STORAGE_BASIC_COLUMN_STORE_VALUE_ACCESSOR_HPP_
+#include <cstddef>
+#include <utility>
#include <vector>
#include "catalog/CatalogRelationSchema.hpp"
@@ -43,7 +45,8 @@ class BasicColumnStoreValueAccessorHelper {
: relation_(relation),
num_tuples_(num_tuples),
column_stripes_(column_stripes),
- column_null_bitmaps_(column_null_bitmaps) {
+ column_null_bitmaps_(column_null_bitmaps),
+ attr_max_lengths_(relation.getMaximumAttributeByteLengths()) {
}
inline tuple_id numPackedTuples() const {
@@ -61,9 +64,23 @@ class BasicColumnStoreValueAccessorHelper {
return nullptr;
}
- // TODO(chasseur): Consider cacheing the byte lengths of attributes.
- return static_cast<const char*>(column_stripes_[attr])
- + (tuple * relation_.getAttributeById(attr)->getType().maximumByteLength());
+ return static_cast<const char*>(column_stripes_[attr]) + (tuple * attr_max_lengths_[attr]);
+ }
+
+ template <bool check_null>
+ inline std::pair<const void*, std::size_t> getAttributeValueAndByteLength(const tuple_id tuple,
+ const attribute_id attr) const {
+ DEBUG_ASSERT(tuple < num_tuples_);
+ DEBUG_ASSERT(relation_.hasAttributeWithId(attr));
+ if (check_null
+ && (!column_null_bitmaps_.elementIsNull(attr))
+ && column_null_bitmaps_[attr].getBit(tuple)) {
+ return std::make_pair(nullptr, 0);
+ }
+
+ const std::size_t attr_length = attr_max_lengths_[attr];
+ return std::make_pair(static_cast<const char*>(column_stripes_[attr]) + (tuple * attr_length),
+ attr_length);
}
inline TypedValue getAttributeValueTyped(const tuple_id tuple,
@@ -80,6 +97,7 @@ class BasicColumnStoreValueAccessorHelper {
const tuple_id num_tuples_;
const std::vector<void*> &column_stripes_;
const PtrVector<BitVector<false>, true> &column_null_bitmaps_;
+ const std::vector<std::size_t> &attr_max_lengths_;
DISALLOW_COPY_AND_ASSIGN(BasicColumnStoreValueAccessorHelper);
};
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/14f726e1/storage/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/storage/CMakeLists.txt b/storage/CMakeLists.txt
index b536411..cf6409c 100644
--- a/storage/CMakeLists.txt
+++ b/storage/CMakeLists.txt
@@ -669,6 +669,7 @@ target_link_libraries(quickstep_storage_HashTable
quickstep_types_Type
quickstep_types_TypedValue
quickstep_utility_BloomFilter
+ quickstep_utility_BloomFilterAdapter
quickstep_utility_HashPair
quickstep_utility_Macros)
target_link_libraries(quickstep_storage_HashTableBase
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/14f726e1/storage/CompressedColumnStoreValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/CompressedColumnStoreValueAccessor.hpp b/storage/CompressedColumnStoreValueAccessor.hpp
index 64eb315..984dea3 100644
--- a/storage/CompressedColumnStoreValueAccessor.hpp
+++ b/storage/CompressedColumnStoreValueAccessor.hpp
@@ -52,6 +52,7 @@ class CompressedColumnStoreValueAccessorHelper {
const PtrVector<BitVector<false>, true> &uncompressed_column_null_bitmaps)
: relation_(relation),
num_tuples_(num_tuples),
+ attr_max_lengths_(relation.getMaximumAttributeByteLengths()),
compression_info_(compression_info),
dictionary_coded_attributes_(dictionary_coded_attributes),
truncated_attributes_(truncated_attributes),
@@ -84,6 +85,26 @@ class CompressedColumnStoreValueAccessorHelper {
}
}
+ template <bool check_null>
+ inline std::pair<const void*, std::size_t> getAttributeValueAndByteLength(const tuple_id tuple,
+ const attribute_id attr) const {
+ if (dictionary_coded_attributes_[attr]) {
+ return dictionaries_.atUnchecked(attr).getUntypedValueAndByteLengthForCode<check_null>(
+ getCode(tuple, attr));
+ } else if (truncated_attributes_[attr]) {
+ if (truncated_attribute_is_int_[attr]) {
+ int_buffer_ = getCode(tuple, attr);
+ return std::make_pair(&int_buffer_, sizeof(int_buffer_));
+ } else {
+ long_buffer_ = getCode(tuple, attr);
+ return std::make_pair(&long_buffer_, sizeof(long_buffer_));
+ }
+ } else {
+ return std::make_pair(getAttributePtr<check_null>(tuple, attr),
+ attr_max_lengths_[attr]);
+ }
+ }
+
inline TypedValue getAttributeValueTyped(const tuple_id tuple,
const attribute_id attr) const {
if (dictionary_coded_attributes_[attr]) {
@@ -138,6 +159,7 @@ class CompressedColumnStoreValueAccessorHelper {
const CatalogRelationSchema &relation_;
const tuple_id num_tuples_;
+ const std::vector<std::size_t> &attr_max_lengths_;
const CompressedBlockInfo &compression_info_;
const std::vector<bool> &dictionary_coded_attributes_;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/14f726e1/storage/CompressedPackedRowStoreValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/CompressedPackedRowStoreValueAccessor.hpp b/storage/CompressedPackedRowStoreValueAccessor.hpp
index 024b0ec..7058aec 100644
--- a/storage/CompressedPackedRowStoreValueAccessor.hpp
+++ b/storage/CompressedPackedRowStoreValueAccessor.hpp
@@ -58,6 +58,7 @@ class CompressedPackedRowStoreValueAccessorHelper {
num_tuples_(num_tuples),
tuple_length_bytes_(tuple_length_bytes),
attribute_offsets_(attribute_offsets),
+ attr_max_lengths_(relation.getMaximumAttributeByteLengths()),
compression_info_(compression_info),
dictionary_coded_attributes_(dictionary_coded_attributes),
truncated_attributes_(truncated_attributes),
@@ -92,6 +93,26 @@ class CompressedPackedRowStoreValueAccessorHelper {
}
}
+ template <bool check_null>
+ inline std::pair<const void*, std::size_t> getAttributeValueAndByteLength(const tuple_id tuple,
+ const attribute_id attr) const {
+ if (dictionary_coded_attributes_[attr]) {
+ return dictionaries_.atUnchecked(attr).getUntypedValueAndByteLengthForCode<check_null>(
+ getCode(tuple, attr));
+ } else if (truncated_attributes_[attr]) {
+ if (truncated_attribute_is_int_[attr]) {
+ int_buffer_ = getCode(tuple, attr);
+ return std::make_pair(&int_buffer_, sizeof(int_buffer_));
+ } else {
+ long_buffer_ = getCode(tuple, attr);
+ return std::make_pair(&long_buffer_, sizeof(long_buffer_));
+ }
+ } else {
+ return std::make_pair(getAttributePtr<check_null>(tuple, attr),
+ attr_max_lengths_[attr]);
+ }
+ }
+
inline TypedValue getAttributeValueTyped(const tuple_id tuple,
const attribute_id attr) const {
if (dictionary_coded_attributes_[attr]) {
@@ -150,6 +171,7 @@ class CompressedPackedRowStoreValueAccessorHelper {
const tuple_id num_tuples_;
const std::size_t tuple_length_bytes_;
const std::vector<std::size_t> &attribute_offsets_;
+ const std::vector<std::size_t> &attr_max_lengths_;
const CompressedBlockInfo &compression_info_;
const std::vector<bool> &dictionary_coded_attributes_;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/14f726e1/storage/HashTable.hpp
----------------------------------------------------------------------
diff --git a/storage/HashTable.hpp b/storage/HashTable.hpp
index be31fd9..740e678 100644
--- a/storage/HashTable.hpp
+++ b/storage/HashTable.hpp
@@ -23,6 +23,7 @@
#include <atomic>
#include <cstddef>
#include <cstdlib>
+#include <memory>
#include <type_traits>
#include <vector>
@@ -39,6 +40,7 @@
#include "types/Type.hpp"
#include "types/TypedValue.hpp"
#include "utility/BloomFilter.hpp"
+#include "utility/BloomFilterAdapter.hpp"
#include "utility/HashPair.hpp"
#include "utility/Macros.hpp"
@@ -2246,26 +2248,14 @@ void HashTable<ValueT, resizable, serializable, force_key_copy, allow_duplicate_
InvokeOnAnyValueAccessor(
accessor,
[&](auto *accessor) -> void { // NOLINT(build/c++11)
+ std::unique_ptr<BloomFilterAdapter> bloom_filter_adapter;
+ if (has_probe_side_bloom_filter_) {
+ bloom_filter_adapter.reset(
+ new BloomFilterAdapter(probe_bloom_filters_, probe_attribute_ids_));
+ }
while (accessor->next()) {
- // Probe any bloom filters, if enabled.
- if (has_probe_side_bloom_filter_) {
- DCHECK_EQ(probe_bloom_filters_.size(), probe_attribute_ids_.size());
- // Check if the key is contained in the BloomFilters or not.
- bool bloom_miss = false;
- for (std::size_t i = 0; i < probe_bloom_filters_.size() && !bloom_miss; ++i) {
- const BloomFilter *bloom_filter = probe_bloom_filters_[i];
- for (const attribute_id &attr_id : probe_attribute_ids_[i]) {
- TypedValue bloom_key = accessor->getTypedValue(attr_id);
- if (!bloom_filter->contains(static_cast<const std::uint8_t*>(bloom_key.getDataPtr()),
- bloom_key.getDataSize())) {
- bloom_miss = true;
- break;
- }
- }
- }
- if (bloom_miss) {
- continue; // On a bloom filter miss, probing the hash table can be skipped.
- }
+ if (has_probe_side_bloom_filter_ && bloom_filter_adapter->miss(accessor)) {
+ continue; // On a bloom filter miss, probing the hash table can be skipped.
}
TypedValue key = accessor->getTypedValue(key_attr_id);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/14f726e1/storage/PackedRowStoreValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/PackedRowStoreValueAccessor.hpp b/storage/PackedRowStoreValueAccessor.hpp
index 03a975e..cbd273e 100644
--- a/storage/PackedRowStoreValueAccessor.hpp
+++ b/storage/PackedRowStoreValueAccessor.hpp
@@ -18,6 +18,8 @@
#ifndef QUICKSTEP_STORAGE_PACKED_ROW_STORE_VALUE_ACCESSOR_HPP_
#define QUICKSTEP_STORAGE_PACKED_ROW_STORE_VALUE_ACCESSOR_HPP_
+#include <utility>
+
#include "catalog/CatalogRelationSchema.hpp"
#include "catalog/CatalogTypedefs.hpp"
#include "storage/StorageBlockInfo.hpp"
@@ -40,7 +42,8 @@ class PackedRowStoreValueAccessorHelper {
: relation_(relation),
num_tuples_(num_tuples),
tuple_storage_(tuple_storage),
- null_bitmap_(null_bitmap) {
+ null_bitmap_(null_bitmap),
+ attr_max_lengths_(relation.getMaximumAttributeByteLengths()) {
}
inline tuple_id numPackedTuples() const {
@@ -65,6 +68,25 @@ class PackedRowStoreValueAccessorHelper {
+ relation_.getFixedLengthAttributeOffset(attr); // Attribute offset within tuple.
}
+ template <bool check_null>
+ inline std::pair<const void*, std::size_t> getAttributeValueAndByteLength(const tuple_id tuple,
+ const attribute_id attr) const {
+ DEBUG_ASSERT(tuple < num_tuples_);
+ DEBUG_ASSERT(relation_.hasAttributeWithId(attr));
+ if (check_null) {
+ const int nullable_idx = relation_.getNullableAttributeIndex(attr);
+ if ((nullable_idx != -1)
+ && null_bitmap_->getBit(tuple * relation_.numNullableAttributes() + nullable_idx)) {
+ return std::make_pair(nullptr, 0);
+ }
+ }
+
+ return std::make_pair(static_cast<const char*>(tuple_storage_)
+ + (tuple * relation_.getFixedByteLength())
+ + relation_.getFixedLengthAttributeOffset(attr),
+ attr_max_lengths_[attr]);
+ }
+
inline TypedValue getAttributeValueTyped(const tuple_id tuple,
const attribute_id attr) const {
const Type &attr_type = relation_.getAttributeById(attr)->getType();
@@ -79,6 +101,7 @@ class PackedRowStoreValueAccessorHelper {
const tuple_id num_tuples_;
const void *tuple_storage_;
const BitVector<false> *null_bitmap_;
+ const std::vector<std::size_t> &attr_max_lengths_;
DISALLOW_COPY_AND_ASSIGN(PackedRowStoreValueAccessorHelper);
};
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/14f726e1/storage/SplitRowStoreValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/SplitRowStoreValueAccessor.hpp b/storage/SplitRowStoreValueAccessor.hpp
index 9ea1a3a..19937f2 100644
--- a/storage/SplitRowStoreValueAccessor.hpp
+++ b/storage/SplitRowStoreValueAccessor.hpp
@@ -100,6 +100,11 @@ class SplitRowStoreValueAccessor : public ValueAccessor {
return getUntypedValueAtAbsolutePosition<check_null>(attr_id, current_position_);
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLength(const attribute_id attr_id) const {
+ return getUntypedValueAndByteLengthAtAbsolutePosition<check_null>(attr_id, current_position_);
+ }
+
inline TypedValue getTypedValue(const attribute_id attr_id) const {
return getTypedValueAtAbsolutePosition(attr_id, current_position_);
}
@@ -140,6 +145,44 @@ class SplitRowStoreValueAccessor : public ValueAccessor {
}
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLengthAtAbsolutePosition(const attribute_id attr_id,
+ const tuple_id tid) const {
+ DEBUG_ASSERT(occupancy_bitmap_.getBit(tid));
+ DEBUG_ASSERT(relation_.hasAttributeWithId(attr_id));
+ const char *tuple_slot = static_cast<const char*>(tuple_storage_)
+ + tuple_slot_bytes_ * tid;
+ if (check_null) {
+ const int nullable_idx = relation_.getNullableAttributeIndex(attr_id);
+ if (nullable_idx != -1) {
+ // const_cast is safe here. We will only be using read-only methods of
+ // BitVector.
+ BitVector<true> tuple_null_bitmap(const_cast<void*>(static_cast<const void*>(tuple_slot)),
+ relation_.numNullableAttributes());
+ if (tuple_null_bitmap.getBit(nullable_idx)) {
+ return std::make_pair(nullptr, 0);
+ }
+ }
+ }
+
+ const int variable_length_idx = relation_.getVariableLengthAttributeIndex(attr_id);
+ if (variable_length_idx == -1) {
+ // Fixed-length, stored in-line in slot.
+ return std::make_pair(tuple_slot + per_tuple_null_bitmap_bytes_
+ + relation_.getFixedLengthAttributeOffset(attr_id),
+ attr_max_lengths_[attr_id]);
+
+ } else {
+ // Variable-length, stored at back of block.
+ const std::uint32_t *pos_ptr = reinterpret_cast<const std::uint32_t*>(
+ tuple_slot + per_tuple_null_bitmap_bytes_
+ + relation_.getFixedByteLength()
+ + variable_length_idx * 2 * sizeof(std::uint32_t));
+ return std::make_pair(static_cast<const char*>(tuple_storage_) + pos_ptr[0],
+ pos_ptr[1]);
+ }
+ }
+
inline TypedValue getTypedValueAtAbsolutePosition(const attribute_id attr_id,
const tuple_id tid) const {
DEBUG_ASSERT(occupancy_bitmap_.getBit(tid));
@@ -317,6 +360,7 @@ class SplitRowStoreValueAccessor : public ValueAccessor {
tuple_storage_(tuple_storage),
tuple_slot_bytes_(tuple_slot_bytes),
per_tuple_null_bitmap_bytes_(per_tuple_null_bitmap_bytes),
+ attr_max_lengths_(relation.getMaximumAttributeByteLengths()),
current_position_(std::numeric_limits<std::size_t>::max()) {
}
@@ -327,6 +371,7 @@ class SplitRowStoreValueAccessor : public ValueAccessor {
const void *tuple_storage_;
const std::size_t tuple_slot_bytes_;
const std::size_t per_tuple_null_bitmap_bytes_;
+ const std::vector<std::size_t> &attr_max_lengths_;
std::size_t current_position_;
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/14f726e1/storage/ValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/storage/ValueAccessor.hpp b/storage/ValueAccessor.hpp
index e2a898e..e9370cc 100644
--- a/storage/ValueAccessor.hpp
+++ b/storage/ValueAccessor.hpp
@@ -375,6 +375,11 @@ class TupleIdSequenceAdapterValueAccessor : public ValueAccessor {
return accessor_->template getUntypedValueAtAbsolutePosition<check_null>(attr_id, *current_position_);
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLength(const attribute_id attr_id) const {
+ return accessor_->template getUntypedValueAndByteLengthAtAbsolutePosition<check_null>(attr_id, *current_position_);
+ }
+
inline TypedValue getTypedValue(const attribute_id attr_id) const {
return accessor_->getTypedValueAtAbsolutePosition(attr_id, *current_position_);
}
@@ -387,6 +392,13 @@ class TupleIdSequenceAdapterValueAccessor : public ValueAccessor {
}
// Pass-through.
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLengthAtAbsolutePosition(const attribute_id attr_id,
+ const tuple_id tid) const {
+ return accessor_->template getUntypedValueAndByteLengthAtAbsolutePosition<check_null>(attr_id, tid);
+ }
+
+ // Pass-through.
inline TypedValue getTypedValueAtAbsolutePosition(const attribute_id attr_id,
const tuple_id tid) const {
return accessor_->getTypedValueAtAbsolutePosition(attr_id, tid);
@@ -560,6 +572,12 @@ class OrderedTupleIdSequenceAdapterValueAccessor : public ValueAccessor {
id_sequence_[current_position_]);
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLength(const attribute_id attr_id) const {
+ return accessor_->template getUntypedValueAndByteLengthAtAbsolutePosition<check_null>(
+ attr_id, id_sequence_[current_position_]);
+ }
+
inline TypedValue getTypedValue(const attribute_id attr_id) const {
return accessor_->getTypedValueAtAbsolutePosition(attr_id, id_sequence_[current_position_]);
}
@@ -571,6 +589,13 @@ class OrderedTupleIdSequenceAdapterValueAccessor : public ValueAccessor {
"OrderedTupleIdSequenceAdapterValueAccessor");
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLengthAtAbsolutePosition(const attribute_id attr_id,
+ const tuple_id tid) const {
+ FATAL_ERROR("getUntypedValueAndByteLengthAtAbsolutePosition() not implemented in "
+ "OrderedTupleIdSequenceAdapterValueAccessor");
+ }
+
inline TypedValue getTypedValueAtAbsolutePosition(const attribute_id attr_id,
const tuple_id tid) const {
FATAL_ERROR("getTypedValueAtAbsolutePosition() not implemented in "
@@ -737,6 +762,11 @@ class PackedTupleStorageSubBlockValueAccessor : public ValueAccessor {
return getUntypedValueAtAbsolutePosition<check_null>(attr_id, current_tuple_);
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLength(const attribute_id attr_id) const {
+ return getUntypedValueAndByteLengthAtAbsolutePosition<check_null>(attr_id, current_tuple_);
+ }
+
inline TypedValue getTypedValue(const attribute_id attr_id) const {
return getTypedValueAtAbsolutePosition(attr_id, current_tuple_);
}
@@ -747,6 +777,12 @@ class PackedTupleStorageSubBlockValueAccessor : public ValueAccessor {
return helper_.template getAttributeValue<check_null>(tid, attr_id);
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLengthAtAbsolutePosition(const attribute_id attr_id,
+ const tuple_id tid) const {
+ return helper_.template getAttributeValueAndByteLength<check_null>(tid, attr_id);
+ }
+
inline TypedValue getTypedValueAtAbsolutePosition(const attribute_id attr_id,
const tuple_id tid) const {
return helper_.getAttributeValueTyped(tid, attr_id);
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/14f726e1/types/containers/ColumnVector.hpp
----------------------------------------------------------------------
diff --git a/types/containers/ColumnVector.hpp b/types/containers/ColumnVector.hpp
index 76968ba..a9349ee 100644
--- a/types/containers/ColumnVector.hpp
+++ b/types/containers/ColumnVector.hpp
@@ -193,6 +193,22 @@ class NativeColumnVector : public ColumnVector {
}
/**
+ * @brief Get the untyped pointer to a value as well as the value's byte length
+ * in this NativeColumnVector as a pair.
+ *
+ * @param position The position of the value to get.
+ * @return A pair containing the untyped pointer to the value at position and
+ * the value's byte length.
+ **/
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLength(const std::size_t position) const {
+ DCHECK_LT(position, actual_length_);
+ return (check_null && null_bitmap_ && null_bitmap_->getBit(position))
+ ? std::make_pair(nullptr, 0)
+ : std::make_pair(static_cast<const char*>(values_) + (position * type_length_), type_length_);
+ }
+
+ /**
* @brief Get a value in this NativeColumnVector as a TypedValue.
*
* @param position The position of the value to get.
@@ -453,6 +469,25 @@ class IndirectColumnVector : public ColumnVector {
}
/**
+ * @brief Get the untyped pointer to a value as well as the value's byte length
+ * in this IndirectColumnVector as a pair.
+ *
+ * @param position The position of the value to get.
+ * @return A pair containing the untyped pointer to the value at position and
+ * the value's byte length.
+ **/
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLength(const std::size_t position) const {
+ DCHECK_LT(position, values_.size());
+ if (check_null && type_is_nullable_ && values_[position].isNull()) {
+ return std::make_pair(nullptr, 0);
+ } else {
+ const TypedValue &value = values_[position];
+ return std::make_pair(value.getDataPtr(), value.getDataSize());
+ }
+ }
+
+ /**
* @brief Get a value in this IndirectColumnVector as a TypedValue.
*
* @param position The position of the value to get.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/14f726e1/types/containers/ColumnVectorsValueAccessor.hpp
----------------------------------------------------------------------
diff --git a/types/containers/ColumnVectorsValueAccessor.hpp b/types/containers/ColumnVectorsValueAccessor.hpp
index f1d29a2..d69d1d8 100644
--- a/types/containers/ColumnVectorsValueAccessor.hpp
+++ b/types/containers/ColumnVectorsValueAccessor.hpp
@@ -124,6 +124,11 @@ class ColumnVectorsValueAccessor : public ValueAccessor {
return getUntypedValueAtAbsolutePosition<check_null>(attr_id, current_position_);
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLength(const attribute_id attr_id) const {
+ return getUntypedValueAndByteLengthAtAbsolutePosition<check_null>(attr_id, current_position_);
+ }
+
inline TypedValue getTypedValue(const attribute_id attr_id) const {
return getTypedValueAtAbsolutePosition(attr_id, current_position_);
}
@@ -140,6 +145,18 @@ class ColumnVectorsValueAccessor : public ValueAccessor {
}
}
+ template <bool check_null = true>
+ inline std::pair<const void*, std::size_t> getUntypedValueAndByteLengthAtAbsolutePosition(const attribute_id attr_id,
+ const tuple_id tid) const {
+ DCHECK(attributeIdInRange(attr_id));
+ DCHECK(tupleIdInRange(tid));
+ if (column_native_[attr_id]) {
+ return static_cast<const NativeColumnVector&>(*columns_[attr_id]).getUntypedValueAndByteLength<check_null>(tid);
+ } else {
+ return static_cast<const IndirectColumnVector&>(*columns_[attr_id]).getUntypedValueAndByteLength<check_null>(tid);
+ }
+ }
+
inline TypedValue getTypedValueAtAbsolutePosition(const attribute_id attr_id,
const tuple_id tid) const {
DCHECK(attributeIdInRange(attr_id));
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/14f726e1/utility/BloomFilterAdapter.hpp
----------------------------------------------------------------------
diff --git a/utility/BloomFilterAdapter.hpp b/utility/BloomFilterAdapter.hpp
new file mode 100644
index 0000000..5deb275
--- /dev/null
+++ b/utility/BloomFilterAdapter.hpp
@@ -0,0 +1,128 @@
+/**
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin\u2014Madison.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **/
+
+#ifndef QUICKSTEP_UTILITY_BLOOM_FILTER_ADAPTER_HPP
+#define QUICKSTEP_UTILITY_BLOOM_FILTER_ADAPTER_HPP
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "catalog/CatalogTypedefs.hpp"
+#include "utility/BloomFilter.hpp"
+#include "utility/Macros.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+
+/** \addtogroup Utility
+ * @{
+ */
+
+class BloomFilterAdapter {
+ public:
+ BloomFilterAdapter(const std::vector<const BloomFilter*> &bloom_filters,
+ const std::vector<std::vector<attribute_id>> &attribute_ids)
+ : num_bloom_filters_(bloom_filters.size()) {
+ DCHECK_EQ(bloom_filters.size(), attribute_ids.size());
+
+ bloom_filter_entries_.reserve(num_bloom_filters_);
+ bloom_filter_entry_indices_.reserve(num_bloom_filters_);
+
+ for (std::size_t i = 0; i < num_bloom_filters_; ++i) {
+ bloom_filter_entries_.emplace_back(bloom_filters[i], attribute_ids[i]);
+ bloom_filter_entry_indices_.emplace_back(i);
+ }
+ }
+
+ template <typename ValueAccessorT>
+ inline bool miss(const ValueAccessorT *accessor) {
+ return missImpl<ValueAccessorT, true>(accessor);
+ }
+
+ template <typename ValueAccessorT, bool adapt_filters>
+ inline bool missImpl(const ValueAccessorT *accessor) {
+ for (std::size_t i = 0; i < num_bloom_filters_; ++i) {
+ const std::size_t entry_idx = bloom_filter_entry_indices_[i];
+ BloomFilterEntry &entry = bloom_filter_entries_[entry_idx];
+ if (adapt_filters) {
+ ++entry.cnt;
+ }
+
+ const BloomFilter *bloom_filter = entry.bloom_filter;
+ for (const attribute_id &attr_id : entry.attribute_ids) {
+ const std::pair<const void*, std::size_t> value_and_byte_length =
+ accessor->getUntypedValueAndByteLength(attr_id);
+ if (!bloom_filter->contains(static_cast<const std::uint8_t*>(value_and_byte_length.first),
+ value_and_byte_length.second)) {
+ if (adapt_filters) {
+ // Record miss
+ ++entry.miss;
+
+ // Update entry order
+ if (i > 0) {
+ const std::size_t prev_entry_idx = bloom_filter_entry_indices_[i-1];
+ if (entry.isBetterThan(bloom_filter_entries_[prev_entry_idx])) {
+ bloom_filter_entry_indices_[i-1] = entry_idx;
+ bloom_filter_entry_indices_[i] = prev_entry_idx;
+ }
+ }
+ }
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ private:
+ struct BloomFilterEntry {
+ BloomFilterEntry(const BloomFilter *in_bloom_filter,
+ const std::vector<attribute_id> &in_attribute_ids)
+ : bloom_filter(in_bloom_filter),
+ attribute_ids(in_attribute_ids),
+ miss(0),
+ cnt(0) {
+ }
+
+ inline bool isBetterThan(const BloomFilterEntry& other) {
+ return static_cast<std::uint64_t>(miss) * other.cnt
+ > static_cast<std::uint64_t>(cnt + 5) * (other.miss + 5);
+ }
+
+ const BloomFilter *bloom_filter;
+ const std::vector<attribute_id> &attribute_ids;
+ std::uint32_t miss;
+ std::uint32_t cnt;
+ };
+
+ const std::size_t num_bloom_filters_;
+ std::vector<BloomFilterEntry> bloom_filter_entries_;
+ std::vector<std::size_t> bloom_filter_entry_indices_;
+
+ DISALLOW_COPY_AND_ASSIGN(BloomFilterAdapter);
+};
+
+/** @} */
+
+} // namespace quickstep
+
+#endif // QUICKSTEP_UTILITY_BLOOM_FILTER_ADAPTER_HPP
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/14f726e1/utility/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/utility/CMakeLists.txt b/utility/CMakeLists.txt
index 2d3db8f..378c4c9 100644
--- a/utility/CMakeLists.txt
+++ b/utility/CMakeLists.txt
@@ -159,6 +159,7 @@ add_library(quickstep_utility_Alignment ../empty_src.cpp Alignment.hpp)
add_library(quickstep_utility_BitManipulation ../empty_src.cpp BitManipulation.hpp)
add_library(quickstep_utility_BitVector ../empty_src.cpp BitVector.hpp)
add_library(quickstep_utility_BloomFilter ../empty_src.cpp BloomFilter.hpp)
+add_library(quickstep_utility_BloomFilterAdapter ../empty_src.cpp BloomFilterAdapter.hpp)
add_library(quickstep_utility_BloomFilter_proto
${quickstep_utility_BloomFilter_proto_srcs}
${quickstep_utility_BloomFilter_proto_hdrs})
@@ -216,6 +217,10 @@ target_link_libraries(quickstep_utility_BloomFilter
quickstep_threading_SpinSharedMutex
quickstep_utility_BloomFilter_proto
quickstep_utility_Macros)
+target_link_libraries(quickstep_utility_BloomFilterAdapter
+ quickstep_catalog_CatalogTypedefs
+ quickstep_utility_BloomFilter
+ quickstep_utility_Macros)
target_link_libraries(quickstep_utility_BloomFilter_proto
${PROTOBUF_LIBRARY})
target_link_libraries(quickstep_utility_CalculateInstalledMemory
@@ -297,6 +302,7 @@ target_link_libraries(quickstep_utility
quickstep_utility_BitManipulation
quickstep_utility_BitVector
quickstep_utility_BloomFilter
+ quickstep_utility_BloomFilterAdapter
quickstep_utility_BloomFilter_proto
quickstep_utility_CalculateInstalledMemory
quickstep_utility_Cast