You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by hb...@apache.org on 2016/09/06 20:16:29 UTC
[52/73] [abbrv] incubator-quickstep git commit: Initial commit for
QUICKSTEP-28 and QUICKSTEP-29. Code refactoring and cleanup,
some more optimizations are pending.
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/169ae326/storage/FastHashTable.hpp
----------------------------------------------------------------------
diff --git a/storage/FastHashTable.hpp b/storage/FastHashTable.hpp
new file mode 100644
index 0000000..12e447f
--- /dev/null
+++ b/storage/FastHashTable.hpp
@@ -0,0 +1,2640 @@
+/**
+ * Copyright 2011-2015 Quickstep Technologies LLC.
+ * Copyright 2015-2016 Pivotal Software, Inc.
+ * Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ * University of Wisconsin\u2014Madison.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **/
+
+#ifndef QUICKSTEP_STORAGE_FAST_HASH_TABLE_HPP_
+#define QUICKSTEP_STORAGE_FAST_HASH_TABLE_HPP_
+
+#include <atomic>
+#include <cstddef>
+#include <cstdlib>
+#include <type_traits>
+#include <vector>
+
+#include "catalog/CatalogTypedefs.hpp"
+#include "storage/HashTableBase.hpp"
+#include "storage/StorageBlob.hpp"
+#include "storage/StorageBlockInfo.hpp"
+#include "storage/StorageConstants.hpp"
+#include "storage/StorageManager.hpp"
+#include "storage/TupleReference.hpp"
+#include "storage/ValueAccessor.hpp"
+#include "storage/ValueAccessorUtil.hpp"
+#include "expressions/aggregation/AggregationHandleAvg.hpp"
+#include "threading/SpinSharedMutex.hpp"
+#include "threading/SpinMutex.hpp"
+#include "types/Type.hpp"
+#include "types/TypedValue.hpp"
+#include "utility/BloomFilter.hpp"
+#include "utility/HashPair.hpp"
+#include "utility/Macros.hpp"
+#include "storage/HashTable.hpp"
+
+namespace quickstep {
+
+/** \addtogroup Storage
+ * @{
+ */
+
+/**
+ * @brief Base class for hash table.
+ *
+ * This class is templated so that the core hash-table logic can be reused in
+ * different contexts requiring different value types and semantics (e.g.
+ * hash-joins vs. hash-based grouping for aggregates vs. hash-based indices).
+ * The base template defines the interface that HashTables provide to clients
+ * and implements some common functionality for all HashTables. There a few
+ * different (also templated) implementation classes that inherit from this
+ * base class and have different physical layouts with different performance
+ * characteristics. As of this writing, they are:
+ * 1. LinearOpenAddressingHashTable - All keys/values are stored directly
+ * in a single array of buckets. Collisions are handled by simply
+ * advancing to the "next" adjacent bucket until an empty bucket is
+ * found. This implementation is vulnerable to performance degradation
+ * due to the formation of bucket chains when there are many duplicate
+ * and/or consecutive keys.
+ * 2. SeparateChainingHashTable - Keys/values are stored in a separate
+ * region of memory from the base hash table slot array. Every bucket
+ * has a "next" pointer so that entries that collide (i.e. map to the
+ * same base slot) form chains of pointers with each other. Although
+ * this implementation has some extra indirection compared to
+ * LinearOpenAddressingHashTable, it does not have the same
+ * vulnerabilities to key skew, and it additionally supports a very
+ * efficient bucket-preallocation mechanism that minimizes cache
+ * coherency overhead when multiple threads are building a HashTable
+ * as part of a hash-join.
+ * 3. SimpleScalarSeparateChainingHashTable - A simplified version of
+ * SeparateChainingHashTable that is only usable for single, scalar
+ * keys with a reversible hash function. This implementation exploits
+ * the reversible hash to avoid storing separate copies of keys at all,
+ * and to skip an extra key comparison when hash codes collide.
+ *
+ * @note If you need to create a HashTable and not just use it as a client, see
+ * HashTableFactory, which simplifies the process of creating a
+ * HashTable.
+ *
+ * @param ValueT The mapped value in this hash table. Must be
+ * copy-constructible. For a serializable hash table, ValueT must also
+ * be trivially copyable and trivially destructible (and beware of
+ * pointers to external memory).
+ * @param resizable Whether this hash table is resizable (using memory from a
+ * StorageManager) or not (using a private, fixed memory allocation).
+ * @param serializable If true, this hash table can safely be saved to and
+ * loaded from disk. If false, some out of band memory may be used (e.g.
+ * to store variable length keys).
+ * @param force_key_copy If true, inserted keys are always copied into this
+ * HashTable's memory. If false, pointers to external values may be
+ * stored instead. force_key_copy should be true if the hash table will
+ * outlive the external key values which are inserted into it. Note that
+ * if serializable is true and force_key_copy is false, then relative
+ * offsets will be used instead of absolute pointers to keys, meaning
+ * that the pointed-to keys must be serialized and deserialized in
+ * exactly the same relative byte order (e.g. as part of the same
+ * StorageBlock), and keys must not change position relative to this
+ * HashTable (beware TupleStorageSubBlocks that may self-reorganize when
+ * modified). If serializable and resizable are both true, then
+ * force_key_copy must also be true.
+ * @param allow_duplicate_keys If true, multiple values can be mapped to the
+ * same key. If false, one and only one value may be mapped.
+ **/
+template <bool resizable,
+ bool serializable,
+ bool force_key_copy,
+ bool allow_duplicate_keys>
+class FastHashTable : public HashTableBase<resizable,
+ serializable,
+ force_key_copy,
+ allow_duplicate_keys> {
+ static_assert(!(serializable && resizable && !force_key_copy),
+ "A HashTable must have force_key_copy=true when serializable "
+ "and resizable are both true.");
+
+ // TODO(chasseur): GCC 4.8.3 doesn't yet implement
+ // std::is_trivially_copyable. In the future, we should include a
+ // static_assert that prevents a serializable HashTable from being used with
+ // a ValueT which is not trivially copyable.
+
+ public:
+ // Shadow template parameters. This is useful for shared test harnesses.
+// typedef ValueT value_type;
+ static constexpr bool template_resizable = resizable;
+ static constexpr bool template_serializable = serializable;
+ static constexpr bool template_force_key_copy = force_key_copy;
+ static constexpr bool template_allow_duplicate_keys = allow_duplicate_keys;
+
+ // Some HashTable implementations (notably LinearOpenAddressingHashTable)
+ // use a special hash code to represent an empty bucket, and another special
+ // code to indicate that a bucket is currently being inserted into. For those
+ // HashTables, this is a surrogate hash value for empty buckets. Keys which
+ // actually hash to this value should have their hashes mutated (e.g. by
+ // adding 1). We use zero, since we will often be using memory which is
+ // already zeroed-out and this saves us the trouble of a memset. This has
+ // some downside, as the hash function we use is the identity hash for
+ // integers, and the integer 0 is common in many data sets and must be
+ // adjusted (and will then spuriously collide with 1). Nevertheless, this
+ // expense is outweighed by no longer having to memset large regions of
+ // memory when initializing a HashTable.
+ static constexpr unsigned char kEmptyHashByte = 0x0;
+ static constexpr std::size_t kEmptyHash = 0x0;
+
+ // A surrogate hash value for a bucket which is currently being inserted
+ // into. As with kEmptyHash, keys which actually hash to this value should
+ // have their hashes adjusted.
+ static constexpr std::size_t kPendingHash = ~kEmptyHash;
+
+ /**
+ * @brief Virtual destructor.
+ **/
+ virtual ~FastHashTable() {
+ if (resizable) {
+ if (blob_.valid()) {
+ if (serializable) {
+ DEV_WARNING("Destroying a resizable serializable HashTable's underlying "
+ "StorageBlob.");
+ }
+ const block_id blob_id = blob_->getID();
+ blob_.release();
+ storage_manager_->deleteBlockOrBlobFile(blob_id);
+ }
+ }
+ }
+
+ /**
+ * @brief Get the ID of the StorageBlob used to store a resizable HashTable.
+ *
+ * @warning This method must not be used for a non-resizable HashTable.
+ *
+ * @return The ID of the StorageBlob used to store this HashTable.
+ **/
+ inline block_id getBlobId() const {
+ DEBUG_ASSERT(resizable);
+ return blob_->getID();
+ }
+
+ /**
+ * @brief Erase all entries in this hash table.
+ *
+ * @warning This method is not guaranteed to be threadsafe.
+ **/
+ virtual void clear() = 0;
+
+ /**
+ * @brief Add a new entry into the hash table.
+ *
+ * @warning The key must not be null.
+ * @warning This method is threadsafe with regard to other calls to put(),
+ * putCompositeKey(), putValueAccessor(), and
+ * putValueAccessorCompositeKey(), but should not be used
+ * simultaneously with upsert(), upsertCompositeKey(),
+ * upsertValueAccessor(), or upsertValueAccessorCompositeKey().
+ * @note This version is for single scalar keys, see also putCompositeKey().
+ * @note If the hash table is (close to) full and resizable is true, this
+ * routine might result in rebuilding the entire hash table.
+ *
+ * @param key The key.
+ * @param value The value payload.
+ * @return HashTablePutResult::kOK if an entry was successfully inserted,
+ * HashTablePutResult::kDuplicateKey if allow_duplicate_keys is false
+ * and key was a duplicate, or HashTablePutResult::kOutOfSpace if
+ * resizable is false and storage space for the hash table has been
+ * exhausted.
+ **/
+ HashTablePutResult put(const TypedValue &key,
+ const uint8_t &value);
+
+ /**
+ * @brief Add a new entry into the hash table (composite key version).
+ *
+ * @warning No component of the key may be null.
+ * @warning This method is threadsafe with regard to other calls to put(),
+ * putCompositeKey(), putValueAccessor(), and
+ * putValueAccessorCompositeKey(), but should not be used
+ * simultaneously with upsert(), upsertCompositeKey(),
+ * upsertValueAccessor(), or upsertValueAccessorCompositeKey().
+ * @note This version is for composite keys, see also put().
+ * @note If the hash table is (close to) full and resizable is true, this
+ * routine might result in rebuilding the entire hash table.
+ *
+ * @param key The components of the key.
+ * @param value The value payload.
+ * @return HashTablePutResult::kOK if an entry was successfully inserted,
+ * HashTablePutResult::kDuplicateKey if allow_duplicate_keys is false
+ * and key was a duplicate, or HashTablePutResult::kOutOfSpace if
+ * resizable is false and storage space for the hash table has been
+ * exhausted.
+ **/
+ HashTablePutResult putCompositeKey(const std::vector<TypedValue> &key,
+ const uint8_t &value);
+
+ HashTablePutResult putCompositeKeyFast(const std::vector<TypedValue> &key,
+ const uint8_t *value_ptr);
+
+ /**
+ * @brief Add (multiple) new entries into the hash table from a
+ * ValueAccessor.
+ *
+ * @warning This method is threadsafe with regard to other calls to put(),
+ * putCompositeKey(), putValueAccessor(), and
+ * putValueAccessorCompositeKey(), but should not be used
+ * simultaneously with upsert(), upsertCompositeKey(),
+ * upsertValueAccessor(), or upsertValueAccessorCompositeKey().
+ * @note This version is for scalar keys, see also
+ * putValueAccessorCompositeKey().
+ * @note If the hash table fills up while this call is in progress and
+ * resizable is true, this might result in rebuilding the entire hash
+ * table.
+ *
+ * @param accessor A ValueAccessor which will be used to access keys.
+ * beginIteration() should be called on accessor before calling this
+ * method.
+ * @param key_attr_id The attribute ID of the keys to be read from accessor.
+ * @param check_for_null_keys If true, each key will be checked to see if it
+ * is null before inserting it (null keys are skipped). This must be
+ * set to true if some of the keys that will be read from accessor may
+ * be null.
+ * @param functor A pointer to a functor, which should provide a call
+ * operator that takes const ValueAccessor& as an argument (or better
+ * yet, a templated call operator which takes a const reference to
+ * some subclass of ValueAccessor as an argument) and returns either
+ * a ValueT or a reference to a ValueT. The functor should generate
+ * the appropriate mapped value for the current tuple the accessor is
+ * iterating on.
+ * @return HashTablePutResult::kOK if all keys and generated values from
+ * accessor were successfully inserted.
+ * HashTablePutResult::kOutOfSpace is returned if this hash-table is
+ * non-resizable and ran out of space (note that some entries may
+ * still have been inserted, and accessor's iteration will be left on
+ * the first tuple which could not be inserted).
+ * HashTablePutResult::kDuplicateKey is returned if
+ * allow_duplicate_keys is false and a duplicate key is encountered
+ * (as with HashTablePutResult::kOutOfSpace, some entries may have
+ * been inserted, and accessor will be left on the tuple with a
+ * duplicate key).
+ **/
+ template <typename FunctorT>
+ HashTablePutResult putValueAccessor(ValueAccessor *accessor,
+ const attribute_id key_attr_id,
+ const bool check_for_null_keys,
+ FunctorT *functor);
+
+ /**
+ * @brief Add (multiple) new entries into the hash table from a
+ * ValueAccessor (composite key version).
+ *
+ * @warning This method is threadsafe with regard to other calls to put(),
+ * putCompositeKey(), putValueAccessor(), and
+ * putValueAccessorCompositeKey(), but should not be used
+ * simultaneously with upsert(), upsertCompositeKey(),
+ * upsertValueAccessor(), or upsertValueAccessorCompositeKey().
+ * @note This version is for composite keys, see also putValueAccessor().
+ * @note If the hash table fills up while this call is in progress and
+ * resizable is true, this might result in rebuilding the entire hash
+ * table.
+ *
+ * @param accessor A ValueAccessor which will be used to access keys.
+ * beginIteration() should be called on accessor before calling this
+ * method.
+ * @param key_attr_ids The attribute IDs of each key component to be read
+ * from accessor.
+ * @param check_for_null_keys If true, each key will be checked to see if it
+ * has a null component before inserting it (null keys are skipped).
+ * This must be set to true if some of the keys that will be read from
+ * accessor may be null.
+ * @param functor A pointer to a functor, which should provide a call
+ * operator that takes const ValueAccessor& as an argument (or better
+ * yet, a templated call operator which takes a const reference to
+ * some subclass of ValueAccessor as an argument) and returns either
+ * a ValueT or a reference to a ValueT. The functor should generate
+ * the appropriate mapped value for the current tuple the accessor is
+ * iterating on.
+ * @return HashTablePutResult::kOK if all keys and generated values from
+ * accessor were successfully inserted.
+ * HashTablePutResult::kOutOfSpace is returned if this hash-table is
+ * non-resizable and ran out of space (note that some entries may
+ * still have been inserted, and accessor's iteration will be left on
+ * the first tuple which could not be inserted).
+ * HashTablePutResult::kDuplicateKey is returned if
+ * allow_duplicate_keys is false and a duplicate key is encountered
+ * (as with HashTablePutResult::kOutOfSpace, some entries may have
+ * been inserted, and accessor will be left on the tuple with a
+ * duplicate key).
+ **/
+ template <typename FunctorT>
+ HashTablePutResult putValueAccessorCompositeKey(
+ ValueAccessor *accessor,
+ const std::vector<attribute_id> &key_attr_ids,
+ const bool check_for_null_keys,
+ FunctorT *functor);
+
+ /**
+ * @brief Apply a functor to the value mapped to a key, first inserting a new
+ * value if one is not already present.
+ *
+ * @warning The key must not be null.
+ * @warning This method is only usable if allow_duplicate_keys is false.
+ * @warning This method is threadsafe with regard to other calls to upsert(),
+ * upsertCompositeKey(), upsertValueAccessor(), and
+ * upsertValueAccessorCompositeKey(), but should not be used
+ * simultaneously with put(), putCompositeKey(), putValueAccessor(),
+ * or putValueAccessorCompositeKey().
+ * @warning The ValueT* pointer passed to functor's call operator is only
+ * guaranteed to be valid for the duration of the call. The functor
+ * should not store a copy of the pointer and assume that it remains
+ * valid.
+ * @warning Although this method itself is threadsafe, the ValueT object
+ * accessed by functor is not guaranteed to be (although it is
+ * guaranteed that its initial insertion will be atomic). If it is
+ * possible for multiple threads to call upsert() with the same key
+ * at the same time, then their access to ValueT should be made
+ * threadsafe (e.g. with the use of atomic types, mutexes, or some
+ * other external synchronization).
+ * @note This version is for single scalar keys, see also
+ * upsertCompositeKey().
+ * @note If the hash table is (close to) full and resizable is true, this
+ * routine might result in rebuilding the entire hash table.
+ *
+ * @param key The key.
+ * @param initial_value If there was not already a preexisting entry in this
+ * HashTable for the specified key, then the value will be initialized
+ * with a copy of initial_value. This parameter is ignored if a value
+ * is already present for key.
+ * @param functor A pointer to a functor, which should provide a call
+ * operator which takes ValueT* as an argument. The call operator will
+ * be invoked once on the value corresponding to key (which may be
+ * newly inserted and default-constructed).
+ * @return True on success, false if upsert failed because there was not
+ * enough space to insert a new entry in this HashTable.
+ **/
+ template <typename FunctorT>
+ bool upsert(const TypedValue &key,
+ const uint8_t &initial_value,
+ FunctorT *functor);
+
+ /**
+ * @brief Apply a functor to the value mapped to a key, first inserting a new
+ * value if one is not already present.
+ *
+ * @warning The key must not be null.
+ * @warning This method is only usable if allow_duplicate_keys is false.
+ * @warning This method is threadsafe with regard to other calls to upsert(),
+ * upsertCompositeKey(), upsertValueAccessor(), and
+ * upsertValueAccessorCompositeKey(), but should not be used
+ * simultaneously with put(), putCompositeKey(), putValueAccessor(),
+ * or putValueAccessorCompositeKey().
+ * @warning The ValueT* pointer passed to functor's call operator is only
+ * guaranteed to be valid for the duration of the call. The functor
+ * should not store a copy of the pointer and assume that it remains
+ * valid.
+ * @warning Although this method itself is threadsafe, the ValueT object
+ * accessed by functor is not guaranteed to be (although it is
+ * guaranteed that its initial insertion will be atomic). If it is
+ * possible for multiple threads to call upsertCompositeKey() with
+ * the same key at the same time, then their access to ValueT should
+ * be made threadsafe (e.g. with the use of atomic types, mutexes,
+ * or some other external synchronization).
+ * @note This version is for composite keys, see also upsert().
+ * @note If the hash table is (close to) full and resizable is true, this
+ * routine might result in rebuilding the entire hash table.
+ *
+ * @param key The key.
+ * @param initial_value If there was not already a preexisting entry in this
+ * HashTable for the specified key, then the value will be initialized
+ * with a copy of initial_value. This parameter is ignored if a value
+ * is already present for key.
+ * @param functor A pointer to a functor, which should provide a call
+ * operator which takes ValueT* as an argument. The call operator will
+ * be invoked once on the value corresponding to key (which may be
+ * newly inserted and default-constructed).
+ * @return True on success, false if upsert failed because there was not
+ * enough space to insert a new entry in this HashTable.
+ **/
+ template <typename FunctorT>
+ bool upsertCompositeKey(const std::vector<TypedValue> &key,
+ const uint8_t &initial_value,
+ FunctorT *functor);
+
+
+ template <typename FunctorT>
+ bool upsertCompositeKeyFast(const std::vector<TypedValue> &key,
+ const uint8_t *init_value_ptr,
+ FunctorT *functor);
+
+ bool upsertCompositeKeyNewFast(const std::vector<TypedValue> &key,
+ const uint8_t *init_value_ptr,
+ const uint8_t *source_state);
+
+ /**
+ * @brief Apply a functor to (multiple) entries in this hash table, with keys
+ * drawn from a ValueAccessor. New values are first inserted if not
+ * already present.
+ *
+ * @warning This method is only usable if allow_duplicate_keys is false.
+ * @warning This method is threadsafe with regard to other calls to upsert(),
+ * upsertCompositeKey(), upsertValueAccessor(), and
+ * upsertValueAccessorCompositeKey(), but should not be used
+ * simultaneously with put(), putCompositeKey(), putValueAccessor(),
+ * or putValueAccessorCompositeKey().
+ * @warning The ValueAccessor reference and ValueT* pointer passed to
+ * functor's call operator are only guaranteed to be valid for the
+ * duration of the call. The functor should not store a copy of
+ * these pointers and assume that they remain valid.
+ * @warning Although this method itself is threadsafe, the ValueT object
+ * accessed by functor is not guaranteed to be (although it is
+ * guaranteed that its initial insertion will be atomic). If it is
+ * possible for multiple threads to call upsertValueAccessor() with
+ * the same key at the same time, then their access to ValueT should
+ * be made threadsafe (e.g. with the use of atomic types, mutexes,
+ * or some other external synchronization).
+ * @note This version is for single scalar keys, see also
+ * upsertValueAccessorCompositeKey().
+ * @note If the hash table is (close to) full and resizable is true, this
+ * routine might result in rebuilding the entire hash table.
+ *
+ * @param accessor A ValueAccessor which will be used to access keys.
+ * beginIteration() should be called on accessor before calling this
+ * method.
+ * @param key_attr_id The attribute ID of the keys to be read from accessor.
+ * @param check_for_null_keys If true, each key will be checked to see if it
+ * is null before upserting it (null keys are skipped). This must be
+ * set to true if some of the keys that will be read from accessor may
+ * be null.
+ * @param functor A pointer to a functor, which should provide a call
+ * operator that takes two arguments: const ValueAccessor& (or better
+ * yet, a templated call operator which takes a const reference to
+ * some subclass of ValueAccessor as its first argument) and ValueT*.
+ * The call operator will be invoked once for every tuple with a
+ * non-null key in accessor.
+ * @return True on success, false if upsert failed because there was not
+ * enough space to insert new entries for all the keys in accessor
+ * (note that some entries may still have been upserted, and
+ * accessor's iteration will be left on the first tuple which could
+ * not be inserted).
+ **/
+ template <typename FunctorT>
+ bool upsertValueAccessor(ValueAccessor *accessor,
+ const attribute_id key_attr_id,
+ const bool check_for_null_keys,
+ const uint8_t &initial_value,
+ FunctorT *functor);
+
+
+ bool upsertValueAccessorFast(const std::vector<std::vector<attribute_id>> &argument_ids,
+ ValueAccessor *accessor,
+ const attribute_id key_attr_id,
+ const bool check_for_null_keys);
+
+ /**
+ * @brief Apply a functor to (multiple) entries in this hash table, with keys
+ * drawn from a ValueAccessor. New values are first inserted if not
+ * already present. Composite key version.
+ *
+ * @warning This method is only usable if allow_duplicate_keys is false.
+ * @warning This method is threadsafe with regard to other calls to upsert(),
+ * upsertCompositeKey(), upsertValueAccessor(), and
+ * upsertValueAccessorCompositeKey(), but should not be used
+ * simultaneously with put(), putCompositeKey(), putValueAccessor(),
+ * or putValueAccessorCompositeKey().
+ * @warning The ValueAccessor reference and ValueT* pointer passed to
+ * functor's call operator are only guaranteed to be valid for the
+ * duration of the call. The functor should not store a copy of
+ * these pointers and assume that they remain valid.
+ * @warning Although this method itself is threadsafe, the ValueT object
+ * accessed by functor is not guaranteed to be (although it is
+ * guaranteed that its initial insertion will be atomic). If it is
+ * possible for multiple threads to call upsertValueAccessor() with
+ * the same key at the same time, then their access to ValueT should
+ * be made threadsafe (e.g. with the use of atomic types, mutexes,
+ * or some other external synchronization).
+ * @note This version is for composite keys, see also upsertValueAccessor().
+ * @note If the hash table is (close to) full and resizable is true, this
+ * routine might result in rebuilding the entire hash table.
+ *
+ * @param accessor A ValueAccessor which will be used to access keys.
+ * beginIteration() should be called on accessor before calling this
+ * method.
+ * @param key_attr_ids The attribute IDs of each key component to be read
+ * from accessor.
+ * @param check_for_null_keys If true, each key will be checked to see if it
+ * is null before upserting it (null keys are skipped). This must be
+ * set to true if some of the keys that will be read from accessor may
+ * be null.
+ * @param functor A pointer to a functor, which should provide a call
+ * operator that takes two arguments: const ValueAccessor& (or better
+ * yet, a templated call operator which takes a const reference to
+ * some subclass of ValueAccessor as its first argument) and ValueT*.
+ * The call operator will be invoked once for every tuple with a
+ * non-null key in accessor.
+ * @return True on success, false if upsert failed because there was not
+ * enough space to insert new entries for all the keys in accessor
+ * (note that some entries may still have been upserted, and
+ * accessor's iteration will be left on the first tuple which could
+ * not be inserted).
+ **/
+ template <typename FunctorT>
+ bool upsertValueAccessorCompositeKey(
+ ValueAccessor *accessor,
+ const std::vector<attribute_id> &key_attr_ids,
+ const bool check_for_null_keys,
+ const uint8_t &initial_value,
+ FunctorT *functor);
+
+ bool upsertValueAccessorCompositeKeyFast(
+ const std::vector<std::vector<attribute_id>> &argument,
+ ValueAccessor *accessor,
+ const std::vector<attribute_id> &key_attr_ids,
+ const bool check_for_null_keys);
+
+ /**
+ * @brief Determine the number of entries (key-value pairs) contained in this
+ * HashTable.
+ * @note For some HashTable implementations, this is O(1), but for others it
+ * may be O(n) where n is the number of buckets.
+ *
+ * @warning This method assumes that no concurrent calls to put(),
+ * putCompositeKey(), putValueAccessor(),
+ * putValueAccessorCompositeKey(), upsert(), upsertCompositeKey(),
+ * upsertValueAccessor(), or upsertValueAccessorCompositeKey() are
+ * taking place (i.e. that this HashTable is immutable for the
+ * duration of the call). Concurrent calls to getSingle(),
+ * getSingleCompositeKey(), getAll(), getAllCompositeKey(),
+ * getAllFromValueAccessor(), getAllFromValueAccessorCompositeKey(),
+ * forEach(), and forEachCompositeKey() are safe.
+ *
+ * @return The number of entries in this HashTable.
+ **/
+ virtual std::size_t numEntries() const = 0;
+
+ /**
+ * @brief Lookup a key against this hash table to find a matching entry.
+ *
+ * @warning Only usable with the hash table that does not allow duplicate
+ * keys.
+ * @warning The key must not be null.
+ * @warning This method assumes that no concurrent calls to put(),
+ * putCompositeKey(), putValueAccessor(),
+ * putValueAccessorCompositeKey(), upsert(), upsertCompositeKey(),
+ * upsertValueAccessor(), or upsertValueAccessorCompositeKey() are
+ * taking place (i.e. that this HashTable is immutable for the
+ * duration of the call and as long as the returned pointer may be
+ * dereferenced). Concurrent calls to getSingle(),
+ * getSingleCompositeKey(), getAll(), getAllCompositeKey(),
+ * getAllFromValueAccessor(), getAllFromValueAccessorCompositeKey(),
+ * forEach(), and forEachCompositeKey() are safe.
+ * @note This version is for single scalar keys. See also
+ * getSingleCompositeKey().
+ *
+ * @param key The key to look up.
+ * @return The value of a matched entry if a matching key is found.
+ * Otherwise, return NULL.
+ **/
+ virtual const uint8_t* getSingle(const TypedValue &key) const = 0;
+
+ /**
+ * @brief Lookup a composite key against this hash table to find a matching
+ * entry.
+ *
+ * @warning Only usable with the hash table that does not allow duplicate
+ * keys.
+ * @warning The key must not be null.
+ * @warning This method assumes that no concurrent calls to put(),
+ * putCompositeKey(), putValueAccessor(),
+ * putValueAccessorCompositeKey(), upsert(), upsertCompositeKey(),
+ * upsertValueAccessor(), or upsertValueAccessorCompositeKey() are
+ * taking place (i.e. that this HashTable is immutable for the
+ * duration of the call and as long as the returned pointer may be
+ * dereferenced). Concurrent calls to getSingle(),
+ * getSingleCompositeKey(), getAll(), getAllCompositeKey(),
+ * getAllFromValueAccessor(), getAllFromValueAccessorCompositeKey(),
+ * forEach(), and forEachCompositeKey() are safe.
+ * @note This version is for composite keys. See also getSingle().
+ *
+ * @param key The key to look up.
+ * @return The value of a matched entry if a matching key is found.
+ * Otherwise, return NULL.
+ **/
+ virtual const uint8_t* getSingleCompositeKey(const std::vector<TypedValue> &key) const = 0;
+ virtual const uint8_t* getSingleCompositeKey(const std::vector<TypedValue> &key, int index) const = 0;
+
+ /**
+ * @brief Lookup a key against this hash table to find matching entries.
+ *
+ * @warning The key must not be null.
+ * @warning This method assumes that no concurrent calls to put(),
+ * putCompositeKey(), putValueAccessor(),
+ * putValueAccessorCompositeKey(), upsert(), upsertCompositeKey(),
+ * upsertValueAccessor(), or upsertValueAccessorCompositeKey() are
+ * taking place (i.e. that this HashTable is immutable for the
+ * duration of the call and as long as the returned pointer may be
+ * dereferenced). Concurrent calls to getSingle(),
+ * getSingleCompositeKey(), getAll(), getAllCompositeKey(),
+ * getAllFromValueAccessor(), getAllFromValueAccessorCompositeKey(),
+ * forEach(), and forEachCompositeKey() are safe.
+ * @note It is more efficient to call getSingle() if the hash table does not
+ * allow duplicate keys.
+ * @note This version is for single scalar keys. See also
+ * getAllCompositeKey().
+ *
+ * @param key The key to look up.
+ * @param values A vector to hold values of all matching entries. Matches
+ * will be appended to the vector.
+ **/
+ virtual void getAll(const TypedValue &key, std::vector<const uint8_t*> *values) const = 0;
+
+ /**
+ * @brief Lookup a composite key against this hash table to find matching
+ * entries.
+ *
+ * @warning The key must not be null.
+ * @warning This method assumes that no concurrent calls to put(),
+ * putCompositeKey(), putValueAccessor(),
+ * putValueAccessorCompositeKey(), upsert(), upsertCompositeKey(),
+ * upsertValueAccessor(), or upsertValueAccessorCompositeKey() are
+ * taking place (i.e. that this HashTable is immutable for the
+ * duration of the call and as long as the returned pointer may be
+ * dereferenced). Concurrent calls to getSingle(),
+ * getSingleCompositeKey(), getAll(), getAllCompositeKey(),
+ * getAllFromValueAccessor(), getAllFromValueAccessorCompositeKey(),
+ * forEach(), and forEachCompositeKey() are safe.
+ * @note It is more efficient to call getSingleCompositeKey() if the hash
+ * table does not allow duplicate keys.
+ * @note This version is for composite keys. See also getAll().
+ *
+ * @param key The key to look up.
+ * @param values A vector to hold values of all matching entries. Matches
+ * will be appended to the vector.
+ **/
+ virtual void getAllCompositeKey(const std::vector<TypedValue> &key,
+ std::vector<const uint8_t*> *values) const = 0;
+
+ /**
+ * @brief Lookup (multiple) keys from a ValueAccessor and apply a functor to
+ * the matching values.
+ *
+ * @warning This method assumes that no concurrent calls to put(),
+ * putCompositeKey(), putValueAccessor(),
+ * putValueAccessorCompositeKey(), upsert(), upsertCompositeKey(),
+ * upsertValueAccessor(), or upsertValueAccessorCompositeKey() are
+ * taking place (i.e. that this HashTable is immutable for the
+ * duration of the call and as long as the returned pointer may be
+ * dereferenced). Concurrent calls to getSingle(),
+ * getSingleCompositeKey(), getAll(), getAllCompositeKey(),
+ * getAllFromValueAccessor(), getAllFromValueAccessorCompositeKey(),
+ * forEach(), and forEachCompositeKey() are safe.
+ * @note This version is for single scalar keys. See also
+ * getAllFromValueAccessorCompositeKey().
+ *
+ * @param accessor A ValueAccessor which will be used to access keys.
+ * beginIteration() should be called on accessor before calling this
+ * method.
+ * @param key_attr_id The attribute ID of the keys to be read from accessor.
+ * @param check_for_null_keys If true, each key will be checked to see if it
+ * is null before looking it up (null keys are skipped). This must be
+ * set to true if some of the keys that will be read from accessor may
+ * be null.
+ * @param functor A pointer to a functor, which should provide a call
+ * operator that takes 2 arguments: const ValueAccessor& (or better
+ * yet, a templated call operator which takes a const reference to
+ * some subclass of ValueAccessor as its first argument) and
+ * const ValueT&. The functor will be invoked once for each pair of a
+ * key taken from accessor and matching value.
+ **/
+ template <typename FunctorT>
+ void getAllFromValueAccessor(ValueAccessor *accessor,
+ const attribute_id key_attr_id,
+ const bool check_for_null_keys,
+ FunctorT *functor) const;
+
+ /**
+ * @brief Lookup (multiple) keys from a ValueAccessor, apply a functor to the
+ * matching values and additionally call a recordMatch() function of
+ * the functor when the first match for a key is found.
+ * @warning This method assumes that no concurrent calls to put(),
+ * putCompositeKey(), putValueAccessor(),
+ * putValueAccessorCompositeKey(), upsert(), upsertCompositeKey(),
+ * upsertValueAccessor(), or upsertValueAccessorCompositeKey() are
+ * taking place (i.e. that this HashTable is immutable for the
+ * duration of the call and as long as the returned pointer may be
+ * dereferenced). Concurrent calls to getSingle(),
+ * getSingleCompositeKey(), getAll(), getAllCompositeKey(),
+ * getAllFromValueAccessor(), getAllFromValueAccessorCompositeKey(),
+ * forEach(), and forEachCompositeKey() are safe.
+ * @note This version is for single scalar keys. See also
+ * getAllFromValueAccessorCompositeKeyWithExtraWorkForFirstMatch().
+ *
+ * @param accessor A ValueAccessor which will be used to access keys.
+ * beginIteration() should be called on accessor before calling this
+ * method.
+ * @param key_attr_id The attribute ID of the keys to be read from accessor.
+ * @param check_for_null_keys If true, each key will be checked to see if it
+ * is null before looking it up (null keys are skipped). This must be
+ * set to true if some of the keys that will be read from accessor may
+ * be null.
+ * @param functor A pointer to a functor, which should provide two functions:
+ * 1) An operator that takes 2 arguments: const ValueAccessor& (or better
+ * yet, a templated call operator which takes a const reference to
+ * some subclass of ValueAccessor as its first argument) and
+ * const ValueT&. The operator will be invoked once for each pair of a
+ * key taken from accessor and matching value.
+ * 2) A function hasMatch that takes 1 argument: const ValueAccessor&.
+ * The function will be called only once for a key from accessor when
+ * the first match is found.
+ */
+ template <typename FunctorT>
+ void getAllFromValueAccessorWithExtraWorkForFirstMatch(
+ ValueAccessor *accessor,
+ const attribute_id key_attr_id,
+ const bool check_for_null_keys,
+ FunctorT *functor) const;
+
+ /**
+ * @brief Lookup (multiple) keys from a ValueAccessor, apply a functor to the
+ * matching values and additionally call a recordMatch() function of
+ * the functor when the first match for a key is found. Composite key
+ * version.
+ * @warning This method assumes that no concurrent calls to put(),
+ * putCompositeKey(), putValueAccessor(),
+ * putValueAccessorCompositeKey(), upsert(), upsertCompositeKey(),
+ * upsertValueAccessor(), or upsertValueAccessorCompositeKey() are
+ * taking place (i.e. that this HashTable is immutable for the
+ * duration of the call and as long as the returned pointer may be
+ * dereferenced). Concurrent calls to getSingle(),
+ * getSingleCompositeKey(), getAll(), getAllCompositeKey(),
+ * getAllFromValueAccessor(), getAllFromValueAccessorCompositeKey(),
+ * forEach(), and forEachCompositeKey() are safe.
+ *
+ * @param accessor A ValueAccessor which will be used to access keys.
+ * beginIteration() should be called on accessor before calling this
+ * method.
+ * @param key_attr_id The attribute ID of the keys to be read from accessor.
+ * @param check_for_null_keys If true, each key will be checked to see if it
+ * is null before looking it up (null keys are skipped). This must be
+ * set to true if some of the keys that will be read from accessor may
+ * be null.
+ * @param functor A pointer to a functor, which should provide two functions:
+ * 1) An operator that takes 2 arguments: const ValueAccessor& (or better
+ * yet, a templated call operator which takes a const reference to
+ * some subclass of ValueAccessor as its first argument) and
+ * const ValueT&. The operator will be invoked once for each pair of a
+ * key taken from accessor and matching value.
+ * 2) A function hasMatch that takes 1 argument: const ValueAccessor&.
+ * The function will be called only once for a key from accessor when
+ * the first match is found.
+ */
+ template <typename FunctorT>
+ void getAllFromValueAccessorCompositeKeyWithExtraWorkForFirstMatch(
+ ValueAccessor *accessor,
+ const std::vector<attribute_id> &key_attr_ids,
+ const bool check_for_null_keys,
+ FunctorT *functor) const;
+
+ /**
+ * @brief Lookup (multiple) keys from a ValueAccessor and apply a functor to
+ * the matching values. Composite key version.
+ *
+ * @warning This method assumes that no concurrent calls to put(),
+ * putCompositeKey(), putValueAccessor(),
+ * putValueAccessorCompositeKey(), upsert(), upsertCompositeKey(),
+ * upsertValueAccessor(), or upsertValueAccessorCompositeKey() are
+ * taking place (i.e. that this HashTable is immutable for the
+ * duration of the call and as long as the returned pointer may be
+ * dereferenced). Concurrent calls to getSingle(),
+ * getSingleCompositeKey(), getAll(), getAllCompositeKey(),
+ * getAllFromValueAccessor(), getAllFromValueAccessorCompositeKey(),
+ * forEach(), and forEachCompositeKey() are safe.
+ * @note This version is for composite keys. See also
+ * getAllFromValueAccessor().
+ *
+ * @param accessor A ValueAccessor which will be used to access keys.
+ * beginIteration() should be called on accessor before calling this
+ * method.
+ * @param key_attr_ids The attribute IDs of each key component to be read
+ * from accessor.
+ * @param check_for_null_keys If true, each key will be checked to see if it
+ * has a null component before inserting it (null keys are skipped).
+ * This must be set to true if some of the keys that will be read from
+ * accessor may be null.
+ * @param functor A pointer to a functor, which should provide a call
+ * operator that takes 2 arguments: const ValueAccessor& (or better
+ * yet, a templated call operator which takes a const reference to
+ * some subclass of ValueAccessor as its first argument) and
+ * const ValueT&. The functor will be invoked once for each pair of a
+ * key taken from accessor and matching value.
+ **/
+ template <typename FunctorT>
+ void getAllFromValueAccessorCompositeKey(ValueAccessor *accessor,
+ const std::vector<attribute_id> &key_attr_ids,
+ const bool check_for_null_keys,
+ FunctorT *functor) const;
+
+ /**
+ * @brief Apply the functor to each key with a match in the hash table.
+ *
+ * @param accessor A ValueAccessor which will be used to access keys.
+ * beginIteration() should be called on accessor before calling this
+ * method.
+ * @param key_attr_id The attribute ID of the keys to be read from accessor.
+ * @param check_for_null_keys If true, each key will be checked to see if it
+ * is null before looking it up (null keys are skipped). This must be
+ * set to true if some of the keys that will be read from accessor may
+ * be null.
+ * @param functor A pointer to a functor which should provide an operator that
+ * takes 1 argument: const ValueAccessor&. The operator will be called
+ * only once for a key from accessor if there is a match.
+ */
+ template <typename FunctorT>
+ void runOverKeysFromValueAccessorIfMatchFound(ValueAccessor *accessor,
+ const attribute_id key_attr_id,
+ const bool check_for_null_keys,
+ FunctorT *functor) const {
+ return runOverKeysFromValueAccessor<true>(accessor,
+ key_attr_id,
+ check_for_null_keys,
+ functor);
+ }
+
+ /**
+ * @brief Apply the functor to each key with a match in the hash table.
+ *
+ * @param accessor A ValueAccessor which will be used to access keys.
+ * beginIteration() should be called on accessor before calling this
+ * method.
+ * @param key_attr_id The attribute ID of the keys to be read from accessor.
+ * @param check_for_null_keys If true, each key will be checked to see if it
+ * is null before looking it up (null keys are skipped). This must be
+ * set to true if some of the keys that will be read from accessor may
+ * be null.
+ * @param functor A pointer to a functor which should provide an operator that
+ * takes 1 argument: const ValueAccessor&. The operator will be called
+ * only once for a key from accessor if there is a match.
+ */
+ template <typename FunctorT>
+ void runOverKeysFromValueAccessorIfMatchFoundCompositeKey(
+ ValueAccessor *accessor,
+ const std::vector<attribute_id> &key_attr_ids,
+ const bool check_for_null_keys,
+ FunctorT *functor) const {
+ return runOverKeysFromValueAccessorCompositeKey<true>(accessor,
+ key_attr_ids,
+ check_for_null_keys,
+ functor);
+ }
+
+ /**
+ * @brief Apply the functor to each key without a match in the hash table.
+ *
+ * @param accessor A ValueAccessor which will be used to access keys.
+ * beginIteration() should be called on accessor before calling this
+ * method.
+ * @param key_attr_id The attribute ID of the keys to be read from accessor.
+ * @param check_for_null_keys If true, each key will be checked to see if it
+ * is null before looking it up (null keys are skipped). This must be
+ * set to true if some of the keys that will be read from accessor may
+ * be null.
+ * @param functor A pointer to a functor which should provide an operator that
+ * takes 1 argument: const ValueAccessor&. The operator will be called
+ * only once for a key from accessor if there is no match.
+ */
+ template <typename FunctorT>
+ void runOverKeysFromValueAccessorIfMatchNotFound(
+ ValueAccessor *accessor,
+ const attribute_id key_attr_id,
+ const bool check_for_null_keys,
+ FunctorT *functor) const {
+ return runOverKeysFromValueAccessor<false>(accessor,
+ key_attr_id,
+ check_for_null_keys,
+ functor);
+ }
+
+ /**
+ * @brief Apply the functor to each key without a match in the hash table.
+ *
+ * @param accessor A ValueAccessor which will be used to access keys.
+ * beginIteration() should be called on accessor before calling this
+ * method.
+ * @param key_attr_id The attribute ID of the keys to be read from accessor.
+ * @param check_for_null_keys If true, each key will be checked to see if it
+ * is null before looking it up (null keys are skipped). This must be
+ * set to true if some of the keys that will be read from accessor may
+ * be null.
+ * @param functor A pointer to a functor which should provide an operator that
+ * takes 1 argument: const ValueAccessor&. The operator will be called
+ * only once for a key from accessor if there is no match.
+ */
+ template <typename FunctorT>
+ void runOverKeysFromValueAccessorIfMatchNotFoundCompositeKey(
+ ValueAccessor *accessor,
+ const std::vector<attribute_id> &key_attr_ids,
+ const bool check_for_null_keys,
+ FunctorT *functor) const {
+ return runOverKeysFromValueAccessorCompositeKey<false>(accessor,
+ key_attr_ids,
+ check_for_null_keys,
+ functor);
+ }
+
+ /**
+ * @brief Apply a functor to each key, value pair in this hash table.
+ *
+ * @warning This method assumes that no concurrent calls to put(),
+ * putCompositeKey(), putValueAccessor(),
+ * putValueAccessorCompositeKey(), upsert(), upsertCompositeKey(),
+ * upsertValueAccessor(), or upsertValueAccessorCompositeKey() are
+ * taking place (i.e. that this HashTable is immutable for the
+ * duration of the call and as long as the returned pointer may be
+ * dereferenced). Concurrent calls to getSingle(),
+ * getSingleCompositeKey(), getAll(), getAllCompositeKey(),
+ * getAllFromValueAccessor(), getAllFromValueAccessorCompositeKey(),
+ * forEach(), and forEachCompositeKey() are safe.
+ * @note This version is for single scalar keys. See also
+ * forEachCompositeKey().
+ *
+ * @param functor A pointer to a functor, which should provide a call
+ * operator which takes 2 arguments: const TypedValue&, const ValueT&.
+ * The call operator will be invoked once on each key, value pair in
+ * this hash table (note that if allow_duplicate_keys is true,
+ * the call may occur multiple times for the same key with different
+ * values).
+ * @return The number of key-value pairs visited.
+ **/
+ template <typename FunctorT>
+ std::size_t forEach(FunctorT *functor) const;
+
+ /**
+ * @brief Apply a functor to each key, value pair in this hash table.
+ *
+ * @warning This method assumes that no concurrent calls to put(),
+ * putCompositeKey(), putValueAccessor(),
+ * putValueAccessorCompositeKey(), upsert(), upsertCompositeKey(),
+ * upsertValueAccessor(), or upsertValueAccessorCompositeKey() are
+ * taking place (i.e. that this HashTable is immutable for the
+ * duration of the call and as long as the returned pointer may be
+ * dereferenced). Concurrent calls to getSingle(),
+ * getSingleCompositeKey(), getAll(), getAllCompositeKey(),
+ * getAllFromValueAccessor(), getAllFromValueAccessorCompositeKey(),
+ * forEach(), and forEachCompositeKey() are safe.
+ * @note This version is for composite keys. See also forEach().
+ *
+ * @param functor A pointer to a functor, which should provide a call
+ * operator which takes 2 arguments: const std::vector<TypedValue>&,
+ * const ValueT&. The call operator will be invoked once on each key,
+ * value pair in this hash table (note that if allow_duplicate_keys is
+ * true, the call may occur multiple times for the same key with
+ * different values).
+ * @return The number of key-value pairs visited.
+ **/
+ template <typename FunctorT>
+ std::size_t forEachCompositeKey(FunctorT *functor) const;
+
+ template <typename FunctorT>
+ std::size_t forEachCompositeKeyFast(FunctorT *functor) const;
+
+ template <typename FunctorT>
+ std::size_t forEachCompositeKeyFast(FunctorT *functor, int index) const;
+ /**
+ * @brief A call to this function will cause a bloom filter to be built
+ * during the build phase of this hash table.
+ **/
+ inline void enableBuildSideBloomFilter() {
+ has_build_side_bloom_filter_ = true;
+ }
+
+ /**
+ * @brief A call to this function will cause a set of bloom filters to be
+ * probed during the probe phase of this hash table.
+ **/
+ inline void enableProbeSideBloomFilter() {
+ has_probe_side_bloom_filter_ = true;
+ }
+
+ /**
+ * @brief This function sets the pointer to the bloom filter to be
+ * used during the build phase of this hash table.
+ * @warning Should call enable_build_side_bloom_filter() first to enable
+ * bloom filter usage during build phase.
+ * @note The ownership of the bloom filter lies with the caller.
+ *
+ * @param bloom_filter The pointer to the bloom filter.
+ **/
+ inline void setBuildSideBloomFilter(BloomFilter *bloom_filter) {
+ build_bloom_filter_ = bloom_filter;
+ }
+
+ /**
+ * @brief This function adds a pointer to the list of bloom filters to be
+ * used during the probe phase of this hash table.
+ * @warning Should call enable_probe_side_bloom_filter() first to enable
+ * bloom filter usage during probe phase.
+ * @note The ownership of the bloom filter lies with the caller.
+ *
+ * @param bloom_filter The pointer to the bloom filter.
+ **/
+ inline void addProbeSideBloomFilter(const BloomFilter *bloom_filter) {
+ probe_bloom_filters_.emplace_back(bloom_filter);
+ }
+
+ /**
+ * @brief This function adds a vector of attribute ids corresponding to a
+ * bloom filter used during the probe phase of this hash table.
+ * @warning Should call enable_probe_side_bloom_filter() first to enable
+ * bloom filter usage during probe phase.
+ *
+ * @param probe_attribute_ids The vector of attribute ids to use for probing
+ * the bloom filter.
+ **/
+ inline void addProbeSideAttributeIds(std::vector<attribute_id> &&probe_attribute_ids) {
+ probe_attribute_ids_.push_back(probe_attribute_ids);
+ }
+
+ protected:
+ /**
+ * @brief Constructor for new resizable hash table.
+ *
+ * @param key_types A vector of one or more types (>1 indicates a composite
+ * key).
+ * @param num_entries The estimated number of entries this hash table will
+ * hold.
+ * @param storage_manager The StorageManager to use (a StorageBlob will be
+ * allocated to hold this hash table's contents).
+ * @param adjust_hashes If true, the hash of a key should be modified by
+ * applying AdjustHash() so that it does not collide with one of the
+ * special values kEmptyHash or kPendingHash. If false, the hash is
+ * used as-is.
+ * @param use_scalar_literal_hash If true, the key is a single scalar literal
+ * (non-composite) that it is safe to use the simplified hash function
+ * TypedValue::getHashScalarLiteral() on. If false, the generic
+ * TypedValue::getHash() method will be used.
+ * @param preallocate_supported If true, this HashTable overrides
+ * preallocateForBulkInsert() to allow bulk-allocation of resources
+ * (i.e. buckets and variable-length key storage) in a single up-front
+ * pass when bulk-inserting entries. If false, resources are allocated
+ * on the fly for each entry.
+ **/
+ FastHashTable(const std::vector<const Type*> &key_types,
+ const std::size_t num_entries,
+ const std::vector<AggregationHandle *> &handles,
+ const std::vector<std::size_t> &payload_sizes,
+ StorageManager *storage_manager,
+ const bool adjust_hashes,
+ const bool use_scalar_literal_hash,
+ const bool preallocate_supported)
+ : key_types_(key_types),
+ scalar_key_inline_(true),
+ key_inline_(nullptr),
+ adjust_hashes_(adjust_hashes),
+ use_scalar_literal_hash_(use_scalar_literal_hash),
+ preallocate_supported_(preallocate_supported),
+ handles_(handles),
+ total_payload_size_(std::accumulate(payload_sizes.begin(), payload_sizes.end(), sizeof(SpinMutex))),
+ storage_manager_(storage_manager),
+ hash_table_memory_(nullptr),
+ hash_table_memory_size_(0) {
+ DEBUG_ASSERT(resizable);
+ std::size_t running_sum = sizeof(SpinMutex);
+ for (auto size : payload_sizes) {
+ payload_offsets_.emplace_back(running_sum);
+ running_sum+=size;
+ }
+ }
+
+ /**
+ * @brief Constructor for non-resizable hash table.
+ *
+ * @param key_types A vector of one or more types (>1 indicates a composite
+ * key).
+ * @param hash_table_memory A pointer to memory to use for this hash table.
+ * @param hash_table_memory_size The size of hash_table_memory in bytes.
+ * @param new_hash_table If true, this hash table is being constructed for
+ * the first time and hash_table_memory will be cleared. If false,
+ * reload a pre-existing hash table.
+ * @param hash_table_memory_zeroed If new_hash_table is true, setting this to
+ * true means that this HashTable will assume that hash_table_memory
+ * has already been zeroed-out (any newly-allocated block or blob
+ * memory from StorageManager is zeroed-out). If false, this HashTable
+ * will explicitly zero-fill its memory as neccessary. This parameter
+ * has no effect when new_hash_table is false.
+ * @param adjust_hashes If true, the hash of a key should be modified by
+ * applying AdjustHash() so that it does not collide with one of the
+ * special values kEmptyHash or kPendingHash. If false, the hash is
+ * used as-is.
+ * @param use_scalar_literal_hash If true, the key is a single scalar literal
+ * (non-composite) that it is safe to use the simplified hash function
+ * TypedValue::getHashScalarLiteral() on. If false, the generic
+ * TypedValue::getHash() method will be used.
+ * @param preallocate_supported If true, this HashTable overrides
+ * preallocateForBulkInsert() to allow bulk-allocation of resources
+ * (i.e. buckets and variable-length key storage) in a single up-front
+ * pass when bulk-inserting entries. If false, resources are allocated
+ * on the fly for each entry.
+ **/
+ FastHashTable(const std::vector<const Type*> &key_types,
+ void *hash_table_memory,
+ const std::size_t hash_table_memory_size,
+ const bool new_hash_table,
+ const bool hash_table_memory_zeroed,
+ const bool adjust_hashes,
+ const bool use_scalar_literal_hash,
+ const bool preallocate_supported)
+ : key_types_(key_types),
+ scalar_key_inline_(true),
+ key_inline_(nullptr),
+ adjust_hashes_(adjust_hashes),
+ use_scalar_literal_hash_(use_scalar_literal_hash),
+ preallocate_supported_(preallocate_supported),
+ storage_manager_(nullptr),
+ hash_table_memory_(hash_table_memory),
+ hash_table_memory_size_(hash_table_memory_size) {
+ DEBUG_ASSERT(!resizable);
+ }
+
+ // Adjust 'hash' so that it is not exactly equal to either of the special
+ // values kEmptyHash or kPendingHash.
+ inline constexpr static std::size_t AdjustHash(const std::size_t hash) {
+ return hash + (hash == kEmptyHash) - (hash == kPendingHash);
+ }
+
+ // Set information about which key components are stored inline. This usually
+ // comes from a HashTableKeyManager, and is set by the constructor of a
+ // subclass of HashTable.
+ inline void setKeyInline(const std::vector<bool> *key_inline) {
+ scalar_key_inline_ = key_inline->front();
+ key_inline_ = key_inline;
+ }
+
+ // Generate a hash for a composite key by hashing each component of 'key' and
+ // mixing their bits with CombineHashes().
+ inline std::size_t hashCompositeKey(const std::vector<TypedValue> &key) const;
+
+ // If 'force_key_copy' is true and some part of a composite key is
+ // variable-length, calculate the total number of bytes for variable-length
+ // key components that need to be copied. Otherwise, return 0 to indicate
+ // that no variable-length copy is required.
+ inline std::size_t calculateVariableLengthCompositeKeyCopySize(
+ const std::vector<TypedValue> &key) const;
+
+ // Helpers for put. If this HashTable is resizable, 'resize_shared_mutex_'
+ // should be locked in shared mode before calling either of these methods.
+ virtual HashTablePutResult putInternal(const TypedValue &key,
+ const std::size_t variable_key_size,
+ const uint8_t &value,
+ HashTablePreallocationState *prealloc_state) = 0;
+ virtual HashTablePutResult putCompositeKeyInternal(const std::vector<TypedValue> &key,
+ const std::size_t variable_key_size,
+ const uint8_t &value,
+ HashTablePreallocationState *prealloc_state) = 0;
+
+ virtual HashTablePutResult putCompositeKeyInternalFast(const std::vector<TypedValue> &key,
+ const std::size_t variable_key_size,
+ const std::uint8_t *init_value_ptr,
+ HashTablePreallocationState *prealloc_state) = 0;
+
+
+ // Helpers for upsert. Both return a pointer to the value corresponding to
+ // 'key'. If this HashTable is resizable, 'resize_shared_mutex_' should be
+ // locked in shared mode while calling and using the returned pointer. May
+ // return NULL if there is not enough space to insert a new key, in which
+ // case a resizable HashTable should release the 'resize_shared_mutex_' and
+ // call resize(), then try again.
+ virtual uint8_t* upsertInternal(const TypedValue &key,
+ const std::size_t variable_key_size,
+ const uint8_t &initial_value) = 0;
+ virtual uint8_t* upsertInternalFast(const TypedValue &key,
+ const std::uint8_t *init_value_ptr,
+ const std::size_t variable_key_size) = 0;
+ virtual uint8_t* upsertCompositeKeyInternal(const std::vector<TypedValue> &key,
+ const std::size_t variable_key_size,
+ const uint8_t &initial_value) = 0;
+
+ virtual uint8_t* upsertCompositeKeyInternalFast(const std::vector<TypedValue> &key,
+ const std::uint8_t *init_value_ptr,
+ const std::size_t variable_key_size) = 0;
+
+ // Helpers for forEach. Each return true on success, false if no more entries
+ // exist to iterate over. After a successful call, '*key' is overwritten with
+ // the key of the next entry, '*value' points to the associated value, and
+ // '*entry_num' is incremented to the next (implementation defined) entry to
+ // check ('*entry_num' should initially be set to zero).
+ virtual bool getNextEntry(TypedValue *key,
+ const uint8_t **value,
+ std::size_t *entry_num) const = 0;
+ virtual bool getNextEntryCompositeKey(std::vector<TypedValue> *key,
+ const uint8_t **value,
+ std::size_t *entry_num) const = 0;
+
+ // Helpers for getAllFromValueAccessor. Each return true on success, false if
+ // no more entries exist for the specified key. After a successful call,
+ // '*value' points to the associated value, and '*entry_num' is incremented
+ // to the next (implementation defined) entry to check ('*entry_num' should
+ // initially be set to zero).
+ virtual bool getNextEntryForKey(const TypedValue &key,
+ const std::size_t hash_code,
+ const uint8_t **value,
+ std::size_t *entry_num) const = 0;
+ virtual bool getNextEntryForCompositeKey(const std::vector<TypedValue> &key,
+ const std::size_t hash_code,
+ const uint8_t **value,
+ std::size_t *entry_num) const = 0;
+
+ // Return true if key exists in the hash table.
+ virtual bool hasKey(const TypedValue &key) const = 0;
+ virtual bool hasCompositeKey(const std::vector<TypedValue> &key) const = 0;
+
+ // For a resizable HashTable, grow to accomodate more entries. If
+ // 'extra_buckets' is not zero, it may serve as a "hint" to implementations
+ // that at least the requested number of extra buckets are required when
+ // resizing (mainly used in putValueAccessor() and
+ // putValueAccessorCompositeKey() when 'preallocate_supported_' is true).
+ // Implementations are free to ignore 'extra_buckets'. If
+ // 'extra_variable_storage' is not zero, implementations will attempt to
+ // allocate at least enough additional variable-key storage space to
+ // accomodate the number of bytes specified. 'retry_num' is intended ONLY for
+ // when resize() recursively calls itself and should not be set to nonzero by
+ // any other caller.
+ virtual void resize(const std::size_t extra_buckets,
+ const std::size_t extra_variable_storage,
+ const std::size_t retry_num = 0) = 0;
+
+ // In the case where 'allow_duplicate_keys' is true, it is possible to
+ // pre-calculate the number of key-value entries and the amount of
+ // variable-length key storage that will be needed to insert all the
+ // entries from a ValueAccessor in putValueAccessor() or
+ // putValueAccessorCompositeKey() before actually inserting anything. Some
+ // HashTable implemetations (notably SeparateChainingHashTable) can achieve
+ // better performance by ammortizing the cost of allocating certain resources
+ // (buckets and variable-length key storage) in one up-front allocation. This
+ // method is intended to support that. Returns true and fills in
+ // '*prealloc_state' if pre-allocation was successful. Returns false if a
+ // resize() is needed.
+ virtual bool preallocateForBulkInsert(const std::size_t total_entries,
+ const std::size_t total_variable_key_size,
+ HashTablePreallocationState *prealloc_state) {
+ FATAL_ERROR("Called HashTable::preallocateForBulkInsert() on a HashTable "
+ "implementation that does not support preallocation.");
+ }
+
+ // Type(s) of keys.
+ const std::vector<const Type*> key_types_;
+
+ // Information about whether key components are stored inline or in a
+ // separate variable-length storage region. This is usually determined by a
+ // HashTableKeyManager and set by calling setKeyInline().
+ bool scalar_key_inline_;
+ const std::vector<bool> *key_inline_;
+
+ // Whether hashes should be adjusted by AdjustHash() before being used.
+ const bool adjust_hashes_;
+ // Whether it is safe to use the simplified TypedValue::getHashScalarLiteral()
+ // method instead of the generic TypedValue::getHash() method.
+ const bool use_scalar_literal_hash_;
+ // Whether preallocateForBulkInsert() is supported by this HashTable.
+ const bool preallocate_supported_;
+
+ const std::vector<AggregationHandle *> handles_;
+ const std::size_t total_payload_size_;
+ std::vector<std::size_t> payload_offsets_;
+
+ // Used only when resizable is true:
+ StorageManager *storage_manager_;
+ MutableBlobReference blob_;
+ // Locked in shared mode for most operations, exclusive mode during resize.
+ // Not locked at all for non-resizable HashTables.
+ alignas(kCacheLineBytes) SpinSharedMutex<true> resize_shared_mutex_;
+
+ // Used only when resizable is false:
+ void *hash_table_memory_;
+ const std::size_t hash_table_memory_size_;
+virtual size_t get_buckets_allocated() const {return 0;}
+
+ private:
+ // Assign '*key_vector' with the attribute values specified by 'key_attr_ids'
+ // at the current position of 'accessor'. If 'check_for_null_keys' is true,
+ // stops and returns true if any of the values is null, otherwise returns
+ // false.
+ template <typename ValueAccessorT>
+ inline static bool GetCompositeKeyFromValueAccessor(
+ const ValueAccessorT &accessor,
+ const std::vector<attribute_id> &key_attr_ids,
+ const bool check_for_null_keys,
+ std::vector<TypedValue> *key_vector) {
+ for (std::vector<attribute_id>::size_type key_idx = 0;
+ key_idx < key_attr_ids.size();
+ ++key_idx) {
+ (*key_vector)[key_idx] = accessor.getTypedValue(key_attr_ids[key_idx]);
+ if (check_for_null_keys && (*key_vector)[key_idx].isNull()) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ // If run_if_match_found is true, apply the functor to each key if a match is
+ // found; otherwise, apply the functor if no match is found.
+ template <bool run_if_match_found, typename FunctorT>
+ void runOverKeysFromValueAccessor(ValueAccessor *accessor,
+ const attribute_id key_attr_id,
+ const bool check_for_null_keys,
+ FunctorT *functor) const;
+
+ template <bool run_if_match_found, typename FunctorT>
+ void runOverKeysFromValueAccessorCompositeKey(
+ ValueAccessor *accessor,
+ const std::vector<attribute_id> &key_attr_ids,
+ const bool check_for_null_keys,
+ FunctorT *functor) const;
+
+ // Method containing the actual logic implementing getAllFromValueAccessor().
+ // Has extra template parameters that control behavior to avoid some
+ // inner-loop branching.
+ template <typename FunctorT,
+ bool check_for_null_keys,
+ bool adjust_hashes_template,
+ bool use_scalar_literal_hash_template>
+ void getAllFromValueAccessorImpl(ValueAccessor *accessor,
+ const attribute_id key_attr_id,
+ FunctorT *functor) const;
+
+ // Data structures used for bloom filter optimized semi-joins.
+ bool has_build_side_bloom_filter_ = false;
+ bool has_probe_side_bloom_filter_ = false;
+ BloomFilter *build_bloom_filter_;
+ std::vector<const BloomFilter*> probe_bloom_filters_;
+ std::vector<std::vector<attribute_id>> probe_attribute_ids_;
+ DISALLOW_COPY_AND_ASSIGN(FastHashTable);
+};
+
+
+/**
+ * @brief An instantiation of the HashTable template for use in aggregations.
+ * @note This has force_key_copy = true, so that we don't have dangling pointers
+ * to blocks that are evicted.
+ **/
+using AggregationStateFastHashTable = FastHashTable<true, false, true, false>;
+
+/** @} */
+
+// ----------------------------------------------------------------------------
+// Implementations of template class methods follow.
+
+template <bool resizable,
+ bool serializable,
+ bool force_key_copy,
+ bool allow_duplicate_keys>
+HashTablePutResult FastHashTable<resizable, serializable, force_key_copy, allow_duplicate_keys>
+ ::put(const TypedValue &key,
+ const uint8_t &value) {
+ const std::size_t variable_size = (force_key_copy && !scalar_key_inline_) ? key.getDataSize()
+ : 0;
+ if (resizable) {
+ HashTablePutResult result = HashTablePutResult::kOutOfSpace;
+ while (result == HashTablePutResult::kOutOfSpace) {
+ {
+ SpinSharedMutexSharedLock<true> lock(resize_shared_mutex_);
+ result = putInternal(key, variable_size, value, nullptr);
+ }
+ if (result == HashTablePutResult::kOutOfSpace) {
+ resize(0, variable_size);
+ }
+ }
+ return result;
+ } else {
+ return putInternal(key, variable_size, value, nullptr);
+ }
+}
+
+template <bool resizable,
+ bool serializable,
+ bool force_key_copy,
+ bool allow_duplicate_keys>
+HashTablePutResult FastHashTable<resizable, serializable, force_key_copy, allow_duplicate_keys>
+ ::putCompositeKey(const std::vector<TypedValue> &key,
+ const uint8_t& value) {
+ const std::size_t variable_size = calculateVariableLengthCompositeKeyCopySize(key);
+ if (resizable) {
+ HashTablePutResult result = HashTablePutResult::kOutOfSpace;
+ while (result == HashTablePutResult::kOutOfSpace) {
+ {
+ SpinSharedMutexSharedLock<true> lock(resize_shared_mutex_);
+ result = putCompositeKeyInternal(key, variable_size, value, nullptr);
+ }
+ if (result == HashTablePutResult::kOutOfSpace) {
+ resize(0, variable_size);
+ }
+ }
+ return result;
+ } else {
+ return putCompositeKeyInternal(key, variable_size, value, nullptr);
+ }
+}
+
+template <bool resizable,
+ bool serializable,
+ bool force_key_copy,
+ bool allow_duplicate_keys>
+HashTablePutResult FastHashTable<resizable, serializable, force_key_copy, allow_duplicate_keys>
+ ::putCompositeKeyFast(const std::vector<TypedValue> &key,
+ const std::uint8_t* init_value_ptr) {
+ const std::size_t variable_size = calculateVariableLengthCompositeKeyCopySize(key);
+ if (resizable) {
+ HashTablePutResult result = HashTablePutResult::kOutOfSpace;
+ while (result == HashTablePutResult::kOutOfSpace) {
+ {
+ SpinSharedMutexSharedLock<true> lock(resize_shared_mutex_);
+ result = putCompositeKeyInternalFast(key, variable_size, init_value_ptr, nullptr);
+ }
+ if (result == HashTablePutResult::kOutOfSpace) {
+ resize(0, variable_size);
+ }
+ }
+ return result;
+ } else {
+ return putCompositeKeyInternalFast(key, variable_size, init_value_ptr, nullptr);
+ }
+}
+
+
+template <bool resizable,
+ bool serializable,
+ bool force_key_copy,
+ bool allow_duplicate_keys>
+template <typename FunctorT>
+HashTablePutResult FastHashTable<resizable, serializable, force_key_copy, allow_duplicate_keys>
+ ::putValueAccessor(ValueAccessor *accessor,
+ const attribute_id key_attr_id,
+ const bool check_for_null_keys,
+ FunctorT *functor) {
+ HashTablePutResult result = HashTablePutResult::kOutOfSpace;
+ std::size_t variable_size;
+ HashTablePreallocationState prealloc_state;
+ bool using_prealloc = allow_duplicate_keys && preallocate_supported_;
+ return InvokeOnAnyValueAccessor(
+ accessor,
+ [&](auto *accessor) -> HashTablePutResult { // NOLINT(build/c++11)
+ if (using_prealloc) {
+ std::size_t total_entries = 0;
+ std::size_t total_variable_key_size = 0;
+ if (check_for_null_keys || (force_key_copy && !scalar_key_inline_)) {
+ // If we need to filter out nulls OR make variable copies, make a
+ // prepass over the ValueAccessor.
+ while (accessor->next()) {
+ TypedValue key = accessor->getTypedValue(key_attr_id);
+ if (check_for_null_keys && key.isNull()) {
+ continue;
+ }
+ ++total_entries;
+ total_variable_key_size += (force_key_copy && !scalar_key_inline_) ? key.getDataSize() : 0;
+ }
+ accessor->beginIteration();
+ } else {
+ total_entries = accessor->getNumTuples();
+ }
+ if (resizable) {
+ bool prealloc_succeeded = false;
+ while (!prealloc_succeeded) {
+ {
+ SpinSharedMutexSharedLock<true> lock(resize_shared_mutex_);
+ prealloc_succeeded = this->preallocateForBulkInsert(total_entries,
+ total_variable_key_size,
+ &prealloc_state);
+ }
+ if (!prealloc_succeeded) {
+ this->resize(total_entries, total_variable_key_size);
+ }
+ }
+ } else {
+ using_prealloc = this->preallocateForBulkInsert(total_entries,
+ total_variable_key_size,
+ &prealloc_state);
+ }
+ }
+ std::unique_ptr<BloomFilter> thread_local_bloom_filter;
+ if (has_build_side_bloom_filter_) {
+ thread_local_bloom_filter.reset(new BloomFilter(build_bloom_filter_->getRandomSeed(),
+ build_bloom_filter_->getNumberOfHashes(),
+ build_bloom_filter_->getBitArraySize()));
+ }
+ if (resizable) {
+ while (result == HashTablePutResult::kOutOfSpace) {
+ {
+ result = HashTablePutResult::kOK;
+ SpinSharedMutexSharedLock<true> lock(resize_shared_mutex_);
+ while (accessor->next()) {
+ TypedValue key = accessor->getTypedValue(key_attr_id);
+ if (check_for_null_keys && key.isNull()) {
+ continue;
+ }
+ variable_size = (force_key_copy && !scalar_key_inline_) ? key.getDataSize() : 0;
+ result = this->putInternal(key,
+ variable_size,
+ (*functor)(*accessor),
+ using_prealloc ? &prealloc_state : nullptr);
+ // Insert into bloom filter, if enabled.
+ if (has_build_side_bloom_filter_) {
+ thread_local_bloom_filter->insertUnSafe(static_cast<const std::uint8_t *>(key.getDataPtr()),
+ key.getDataSize());
+ }
+ if (result == HashTablePutResult::kDuplicateKey) {
+ DEBUG_ASSERT(!using_prealloc);
+ return result;
+ } else if (result == HashTablePutResult::kOutOfSpace) {
+ DEBUG_ASSERT(!using_prealloc);
+ break;
+ }
+ }
+ }
+ if (result == HashTablePutResult::kOutOfSpace) {
+ this->resize(0, variable_size);
+ accessor->previous();
+ }
+ }
+ } else {
+ while (accessor->next()) {
+ TypedValue key = accessor->getTypedValue(key_attr_id);
+ if (check_for_null_keys && key.isNull()) {
+ continue;
+ }
+ variable_size = (force_key_copy && !scalar_key_inline_) ? key.getDataSize() : 0;
+ result = this->putInternal(key,
+ variable_size,
+ (*functor)(*accessor),
+ using_prealloc ? &prealloc_state : nullptr);
+ // Insert into bloom filter, if enabled.
+ if (has_build_side_bloom_filter_) {
+ thread_local_bloom_filter->insertUnSafe(static_cast<const std::uint8_t *>(key.getDataPtr()),
+ key.getDataSize());
+ }
+ if (result != HashTablePutResult::kOK) {
+ return result;
+ }
+ }
+ }
+ // Update the build side bloom filter with thread local copy, if available.
+ if (has_build_side_bloom_filter_) {
+ build_bloom_filter_->bitwiseOr(thread_local_bloom_filter.get());
+ }
+
+ return HashTablePutResult::kOK;
+ });
+}
+
+template <bool resizable,
+ bool serializable,
+ bool force_key_copy,
+ bool allow_duplicate_keys>
+template <typename FunctorT>
+HashTablePutResult FastHashTable<resizable, serializable, force_key_copy, allow_duplicate_keys>
+ ::putValueAccessorCompositeKey(ValueAccessor *accessor,
+ const std::vector<attribute_id> &key_attr_ids,
+ const bool check_for_null_keys,
+ FunctorT *functor) {
+ DEBUG_ASSERT(key_types_.size() == key_attr_ids.size());
+ HashTablePutResult result = HashTablePutResult::kOutOfSpace;
+ std::size_t variable_size;
+ HashTablePreallocationState prealloc_state;
+ bool using_prealloc = allow_duplicate_keys && preallocate_supported_;
+ std::vector<TypedValue> key_vector;
+ key_vector.resize(key_attr_ids.size());
+ return InvokeOnAnyValueAccessor(
+ accessor,
+ [&](auto *accessor) -> HashTablePutResult { // NOLINT(build/c++11)
+ if (using_prealloc) {
+ std::size_t total_entries = 0;
+ std::size_t total_variable_key_size = 0;
+ if (check_for_null_keys || force_key_copy) {
+ // If we need to filter out nulls OR make variable copies, make a
+ // prepass over the ValueAccessor.
+ while (accessor->next()) {
+ if (this->GetCompositeKeyFromValueAccessor(*accessor,
+ key_attr_ids,
+ check_for_null_keys,
+ &key_vector)) {
+ continue;
+ }
+ ++total_entries;
+ total_variable_key_size += this->calculateVariableLengthCompositeKeyCopySize(key_vector);
+ }
+ accessor->beginIteration();
+ } else {
+ total_entries = accessor->getNumTuples();
+ }
+ if (resizable) {
+ bool prealloc_succeeded = false;
+ while (!prealloc_succeeded) {
+ {
+ SpinSharedMutexSharedLock<true> lock(resize_shared_mutex_);
+ prealloc_succeeded = this->preallocateForBulkInsert(total_entries,
+ total_variable_key_size,
+ &prealloc_state);
+ }
+ if (!prealloc_succeeded) {
+ this->resize(total_entries, total_variable_key_size);
+ }
+ }
+ } else {
+ using_prealloc = this->preallocateForBulkInsert(total_entries,
+ total_variable_key_size,
+ &prealloc_state);
+ }
+ }
+ if (resizable) {
+ while (result == HashTablePutResult::kOutOfSpace) {
+ {
+ result = HashTablePutResult::kOK;
+ SpinSharedMutexSharedLock<true> lock(resize_shared_mutex_);
+ while (accessor->next()) {
+ if (this->GetCompositeKeyFromValueAccessor(*accessor,
+ key_attr_ids,
+ check_for_null_keys,
+ &key_vector)) {
+ continue;
+ }
+ variable_size = this->calculateVariableLengthCompositeKeyCopySize(key_vector);
+ result = this->putCompositeKeyInternal(key_vector,
+ variable_size,
+ (*functor)(*accessor),
+ using_prealloc ? &prealloc_state : nullptr);
+ if (result == HashTablePutResult::kDuplicateKey) {
+ DEBUG_ASSERT(!using_prealloc);
+ return result;
+ } else if (result == HashTablePutResult::kOutOfSpace) {
+ DEBUG_ASSERT(!using_prealloc);
+ break;
+ }
+ }
+ }
+ if (result == HashTablePutResult::kOutOfSpace) {
+ this->resize(0, variable_size);
+ accessor->previous();
+ }
+ }
+ } else {
+ while (accessor->next()) {
+ if (this->GetCompositeKeyFromValueAccessor(*accessor,
+ key_attr_ids,
+ check_for_null_keys,
+ &key_vector)) {
+ continue;
+ }
+ variable_size = this->calculateVariableLengthCompositeKeyCopySize(key_vector);
+ result = this->putCompositeKeyInternal(key_vector,
+ variable_size,
+ (*functor)(*accessor),
+ using_prealloc ? &prealloc_state : nullptr);
+ if (result != HashTablePutResult::kOK) {
+ return result;
+ }
+ }
+ }
+
+ return HashTablePutResult::kOK;
+ });
+}
+
+template <bool resizable,
+ bool serializable,
+ bool force_key_copy,
+ bool allow_duplicate_keys>
+template <typename FunctorT>
+bool FastHashTable<resizable, serializable, force_key_copy, allow_duplicate_keys>
+ ::upsert(const TypedValue &key,
+ const uint8_t &initial_value,
+ FunctorT *functor) {
+ DEBUG_ASSERT(!allow_duplicate_keys);
+ const std::size_t variable_size = (force_key_copy && !scalar_key_inline_) ? key.getDataSize() : 0;
+ if (resizable) {
+ for (;;) {
+ {
+ SpinSharedMutexSharedLock<true> resize_lock(resize_shared_mutex_);
+ uint8_t *value = upsertInternal(key, variable_size, initial_value);
+ if (value != nullptr) {
+ (*functor)(value);
+ return true;
+ }
+ }
+ resize(0, force_key_copy && !scalar_key_inline_ ? key.getDataSize() : 0);
+ }
+ } else {
+ uint8_t *value = upsertInternal(key, variable_size, initial_value);
+ if (value == nullptr) {
+ return false;
+ } else {
+ (*functor)(value);
+ return true;
+ }
+ }
+}
+
+template <bool resizable,
+ bool serializable,
+ bool force_key_copy,
+ bool allow_duplicate_keys>
+template <typename FunctorT>
+bool FastHashTable<resizable, serializable, force_key_copy, allow_duplicate_keys>
+ ::upsertCompositeKey(const std::vector<TypedValue> &key,
+ const uint8_t &initial_value,
+ FunctorT *functor) {
+ DEBUG_ASSERT(!allow_duplicate_keys);
+ const std::size_t variable_size = calculateVariableLengthCompositeKeyCopySize(key);
+ if (resizable) {
+ for (;;) {
+ {
+ SpinSharedMutexSharedLock<true> resize_lock(resize_shared_mutex_);
+ uint8_t *value = upsertCompositeKeyInternal(key, variable_size, initial_value);
+ if (value
<TRUNCATED>