You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by zu...@apache.org on 2017/02/08 08:53:47 UTC
[04/18] incubator-quickstep git commit: - Adds CollisionFreeVectorTable to support specialized fast path aggregation for range-bounded single integer group-by key. - Supports copy elision for aggregation.

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d89e4fb/storage/StorageBlock.hpp
----------------------------------------------------------------------
diff --git a/storage/StorageBlock.hpp b/storage/StorageBlock.hpp
index 16ea50f..25d675c 100644
--- a/storage/StorageBlock.hpp
+++ b/storage/StorageBlock.hpp
@@ -27,7 +27,6 @@
 
 #include "catalog/CatalogTypedefs.hpp"
 #include "storage/CountedReference.hpp"
-#include "storage/HashTableBase.hpp"
 #include "storage/IndexSubBlock.hpp"
 #include "storage/StorageBlockBase.hpp"
 #include "storage/StorageBlockInfo.hpp"
@@ -39,11 +38,7 @@
 
 namespace quickstep {
 
-class AggregationHandle;
-class AggregationState;
 class CatalogRelationSchema;
-class ColumnVector;
-class ColumnVectorsValueAccessor;
 class InsertDestinationInterface;
 class Predicate;
 class Scalar;
@@ -431,156 +426,6 @@ class StorageBlock : public StorageBlockBase {
                     InsertDestinationInterface *destination) const;
 
   /**
-   * @brief Perform non GROUP BY aggregation on the tuples in the this storage
-   *        block, returning the aggregated result (for this block) in an
-   *        AggregationState.
-   *
-   * @param handle Aggregation handle that will be used to compute aggregate.
-   * @param arguments The arguments of the aggregate function as expressions.
-   * @param arguments_as_attributes If non-NULL, indicates a valid attribute_id
-   *        for each of the elements in arguments, and is used to elide a copy.
-   *        Has no effect if NULL, or if VECTOR_COPY_ELISION_LEVEL is NONE.
-   * @param filter If non-NULL, then only tuple IDs which are set in the
-   *        filter will be checked (all others will be assumed to be false).
-   *
-   * @return Aggregated state for this block in the form of an
-   *         AggregationState. AggregationHandle::mergeStates() can be called
-   *         to merge with states from other blocks, and
-   *         AggregationHandle::finalize() can be used to generate a final
-   *         result.
-   **/
-  AggregationState* aggregate(
-      const AggregationHandle &handle,
-      const std::vector<std::unique_ptr<const Scalar>> &arguments,
-      const std::vector<attribute_id> *arguments_as_attributes,
-      const TupleIdSequence *filter) const;
-
-  /**
-   * @brief Perform GROUP BY aggregation on the tuples in the this storage
-   *        block.
-   *
-   * @param arguments The arguments to the aggregation function as Scalars.
-   * @param group_by The list of GROUP BY attributes/expressions. The tuples in
-   *        this storage block are grouped by these attributes before
-   *        aggregation.
-   * @param filter If non-NULL, then only tuple IDs which are set in the
-   *        filter will be checked (all others will be assumed to be false).
-   * @param hash_table Hash table to store aggregation state mapped based on
-   *        GROUP BY value list (defined by \c group_by).
-   * @param reuse_group_by_vectors This parameter is used to store and reuse
-   *        GROUP BY attribute vectors pre-computed in an earlier invocation of
-   *        aggregateGroupBy(). \c reuse_group_by_vectors is never \c nullptr
-   *        for ease of use. Current invocation of aggregateGroupBy() will reuse
-   *        ColumnVectors if non-empty, otherwise computes ColumnVectors based
-   *        on \c group_by and stores them in \c reuse_group_by_vectors.
-   *
-   * For sample usage of aggregateGroupBy, see this relevant pseudo-C++ code:
-   * \code
-   * std::vector<std::unique_ptr<ColumnVector>> group_by_vectors;
-   * for each aggregate {
-   *   block.aggregateGroupBy(..., &group_by_vectors);
-   * }
-   * \endcode
-   **/
-  /*
-   * TODO(shoban): Currently, we use ColumnVectorsValueAccessor to compute
-   * temporary result for Scalars of aggregation attributes and GROUP BY
-   * attributes.  We will have to support specifying aggregation and GROUP BY
-   * attributes as std::vector<attribute_id> (like in selectSimple()) for fast
-   * path when there are no expressions specified in the query.
-   */
-  void aggregateGroupBy(
-      const std::vector<std::vector<std::unique_ptr<const Scalar>>> &arguments,
-      const std::vector<std::unique_ptr<const Scalar>> &group_by,
-      const TupleIdSequence *filter,
-      AggregationStateHashTableBase *hash_table,
-      std::vector<std::unique_ptr<ColumnVector>> *reuse_group_by_vectors) const;
-
-
-  /**
-   * @brief Perform the GROUP BY aggregation for the case when aggregation is
-   *        partitioned.
-   *
-   * TODO(harshad) - Refactor this class to use only one function
-   *       aggregateGroupBy.
-   * @note The difference between this method and the aggregateGroupBy method
-   *       is that in this method, the tuples are routed to different HashTables
-   *       based on the partition to which they belong to. The partition is
-   *       determined by the GROUP BY attributes. Right now hash based
-   *       partitioning is performed.
-   *
-   * @note This function only creates the ColumnVectorsValueAccessor needed for
-   *       the insertion in the hash table. The actual insertion in respective
-   *       hash tables should be handled by the caller. See
-   *       AggregationOperationState::aggregateHashTable() for one such
-   *       implementation.
-   *
-   * @param arguments The arguments to the aggregation function as Scalars.
-   * @param group_by The list of GROUP BY attributes/expressions. The tuples in
-   *        this storage block are grouped by these attributes before
-   *        aggregation.
-   * @param filter If non-NULL, then only tuple IDs which are set in the
-   *        filter will be checked (all others will be assumed to be false).
-   * @param num_partitions The number of partitions used for the aggregation.
-   * @param temp_result The ColumnVectorsValueAccessor used for collecting
-   *        the attribute values from this StorageBlock.
-   * @param arguments_ids The attribute IDs used for the aggregation, which
-   *        come from the arguments vector. If arguments is empty, this vector
-   *        is filled with invalid attribute IDs.
-   * @param key_ids The attribute IDs of the group by attributes.
-   * @param reuse_group_by_vectors This parameter is used to store and reuse
-   *        GROUP BY attribute vectors pre-computed in an earlier invocation of
-   *        aggregateGroupBy(). \c reuse_group_by_vectors is never \c nullptr
-   *        for ease of use. Current invocation of aggregateGroupBy() will reuse
-   *        ColumnVectors if non-empty, otherwise computes ColumnVectors based
-   *        on \c group_by and stores them in \c reuse_group_by_vectors.
-   **/
-  void aggregateGroupByPartitioned(
-      const std::vector<std::vector<std::unique_ptr<const Scalar>>> &arguments,
-      const std::vector<std::unique_ptr<const Scalar>> &group_by,
-      const TupleIdSequence *filter,
-      const std::size_t num_partitions,
-      ColumnVectorsValueAccessor *temp_result,
-      std::vector<attribute_id> *argument_ids,
-      std::vector<attribute_id> *key_ids,
-      std::vector<std::unique_ptr<ColumnVector>> *reuse_group_by_vectors) const;
-
-  /**
-   * @brief Inserts the GROUP BY expressions and aggregation arguments together
-   *        as keys into the distinctify hash table.
-   *
-   * This is the first step for DISTINCT aggregation. It populates the distinctify
-   * hash table so that arguments are distinctified within each GROUP BY group.
-   * Later, a second-round aggregation on the distinctify hash table will be
-   * performed to actually compute the aggregated result for each GROUP BY group.
-   *
-   * @param handle Aggregation handle to compute aggregates with.
-   * @param arguments The arguments to the aggregation function as Scalars.
-   * @param arguments_as_attributes If non-NULL, indicates a valid attribute_id
-   *        for each of the elements in arguments, and is used to elide a copy.
-   *        Has no effect if NULL, or if VECTOR_COPY_ELISION_LEVEL is NONE.
-   * @param group_by The list of GROUP BY attributes/expressions.
-   * @param filter If non-NULL, then only tuple IDs which are set in the
-   *        filter will be checked (all others will be assumed to be false).
-   * @param distinctify_hash_table Hash table to store the arguments and GROUP
-   *        BY expressions together as hash table key and a bool constant \c true
-   *        as hash table value. (So the hash table actually serves as a hash set.)
-   * @param reuse_group_by_vectors This parameter is used to store and reuse
-   *        GROUP BY attribute vectors pre-computed in an earlier invocation of
-   *        aggregateGroupBy(). \c reuse_group_by_vectors is never \c nullptr
-   *        for ease of use. Current invocation of aggregateGroupBy() will reuse
-   *        ColumnVectors if non-empty, otherwise computes ColumnVectors based
-   *        on \c group_by and stores them in \c reuse_group_by_vectors.
-   */
-  void aggregateDistinct(const AggregationHandle &handle,
-                         const std::vector<std::unique_ptr<const Scalar>> &arguments,
-                         const std::vector<attribute_id> *arguments_as_attributes,
-                         const std::vector<std::unique_ptr<const Scalar>> &group_by,
-                         const TupleIdSequence *filter,
-                         AggregationStateHashTableBase *distinctify_hash_table,
-                         std::vector<std::unique_ptr<ColumnVector>> *reuse_group_by_vectors) const;
-
-  /**
    * @brief Perform an UPDATE query over the tuples in this StorageBlock.
    * @warning In some edge cases, calling this method may cause IndexSubBlocks
    *          in this block to become inconsistent (the TupleStorageSubBlock
@@ -702,18 +547,6 @@ class StorageBlock : public StorageBlockBase {
       const tuple_id tuple,
       const std::unordered_map<attribute_id, std::unique_ptr<const Scalar>> &assignments) const;
 
-  AggregationState* aggregateHelperColumnVector(
-      const AggregationHandle &handle,
-      const std::vector<std::unique_ptr<const Scalar>> &arguments,
-      const TupleIdSequence *matches) const;
-
-#ifdef QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_SELECTION
-  AggregationState* aggregateHelperValueAccessor(
-      const AggregationHandle &handle,
-      const std::vector<attribute_id> &argument_ids,
-      const TupleIdSequence *matches) const;
-#endif  // QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_SELECTION
-
   // Sort the tuples in storage block based on `sort_attribute'. If
   // `use_input_sequence' is set, we assume a pre-existing order of tuple-id
   // sequence specified by `sorted_sequence' and use stable sort to maintain

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d89e4fb/storage/ValueAccessorMultiplexer.hpp
----------------------------------------------------------------------
diff --git a/storage/ValueAccessorMultiplexer.hpp b/storage/ValueAccessorMultiplexer.hpp
new file mode 100644
index 0000000..fe2fa8e
--- /dev/null
+++ b/storage/ValueAccessorMultiplexer.hpp
@@ -0,0 +1,145 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_STORAGE_VALUE_ACCESSOR_MULTIPLEXER_HPP_
+#define QUICKSTEP_STORAGE_VALUE_ACCESSOR_MULTIPLEXER_HPP_
+
+#include <vector>
+
+#include "catalog/CatalogTypedefs.hpp"
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+
+class ValueAccessor;
+
+/** \addtogroup Utility
+ *  @{
+ */
+
+enum class ValueAccessorSource {
+  kBase = 0,
+  kDerived,
+  kInvalid
+};
+
+/**
+ * @brief A data structure for representing attribute ids referring multiple
+ *        ValueAccessors.
+ */
+struct MultiSourceAttributeId {
+  MultiSourceAttributeId(const ValueAccessorSource in_source,
+                         const attribute_id in_attr_id)
+      : source(in_source),
+        attr_id(in_attr_id) {}
+
+  MultiSourceAttributeId(const MultiSourceAttributeId &other)
+      : source(other.source),
+        attr_id(other.attr_id) {}
+
+  const ValueAccessorSource source;
+  const attribute_id attr_id;
+};
+
+/**
+ * @brief A class that encapsulates multiple ValueAccessors and provides helper
+ *        methods for accessing the ValueAccessors with MultiSourceAttributeId.
+ *
+ * This class is in its very initial form that serves a small set of essential
+ * functionalities for the purpose of aggregation copy elision. That is, given a
+ * storage block to be aggregated on, we may have aggregations on a storage
+ * attribute (e.g. SUM(x)) or on a non-trivial expression (e.g. SUM(x * y)).
+ * For the former case, copy elision is applicable that the attribute gets accessed
+ * directly from the storage block. In the later case, we have to create a
+ * temporary data structure (i.e. ColumnVectorsValueAccessor) that stores the
+ * intermediate results. Thus, we refer to the ValueAccessor created directly
+ * from the storage block as the BASE accessor and the intermediate result
+ * ColumnVectorsValueAccessor as the DERIVED accessor. And we utilize this class
+ * (ValueAccessorMultiplexer) to pass both accessors around to enable copy elision.
+ *
+ * This class (together with ValueAccessorSource and MultiSourceAttributeId)
+ * may be rewritten or exteneded later to more generally support copy elisions
+ * in various scenarios.
+ */
+class ValueAccessorMultiplexer {
+ public:
+  /**
+   * @brief Constructor for base accessor only.
+   *
+   * @param base_accessor The base accessor.
+   */
+  explicit ValueAccessorMultiplexer(ValueAccessor *base_accessor)
+      : base_accessor_(base_accessor),
+        derived_accessor_(nullptr) {}
+
+  /**
+   * @brief Constructor.
+   *
+   * @param base_accessor The base accessor.
+   * @param derived_accessor The derived accessor.
+   */
+  ValueAccessorMultiplexer(ValueAccessor *base_accessor,
+                           ValueAccessor *derived_accessor)
+      : base_accessor_(base_accessor),
+        derived_accessor_(derived_accessor) {}
+
+  /**
+   * @return The base accessor.
+   */
+  inline ValueAccessor* getBaseAccessor() const {
+    return base_accessor_;
+  }
+
+  /**
+   * @return The derived accessor.
+   */
+  inline ValueAccessor* getDerivedAccessor() const {
+    return derived_accessor_;
+  }
+
+  /**
+   * @brief Get the value accessor that corresponds to the specified source.
+   *
+   * @param source The value accessor source.
+   * @return The value accessor that corresponds to \p source.
+   */
+  inline ValueAccessor* getValueAccessorBySource(
+      const ValueAccessorSource &source) const {
+    switch (source) {
+      case ValueAccessorSource::kBase:
+        return base_accessor_;
+      case ValueAccessorSource::kDerived:
+        return derived_accessor_;
+      default:
+        return nullptr;
+    }
+  }
+
+ private:
+  ValueAccessor *base_accessor_;
+  ValueAccessor *derived_accessor_;
+
+  DISALLOW_COPY_AND_ASSIGN(ValueAccessorMultiplexer);
+};
+
+/** @} */
+
+}  // namespace quickstep
+
+#endif  // QUICKSTEP_STORAGE_VALUE_ACCESSOR_MULTIPLEXER_HPP_

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d89e4fb/utility/BarrieredReadWriteConcurrentBitVector.hpp
----------------------------------------------------------------------
diff --git a/utility/BarrieredReadWriteConcurrentBitVector.hpp b/utility/BarrieredReadWriteConcurrentBitVector.hpp
new file mode 100644
index 0000000..0086c7f
--- /dev/null
+++ b/utility/BarrieredReadWriteConcurrentBitVector.hpp
@@ -0,0 +1,282 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_UTILITY_BARRIERED_READ_WRITE_CONCURRENT_BIT_VECTOR_HPP_
+#define QUICKSTEP_UTILITY_BARRIERED_READ_WRITE_CONCURRENT_BIT_VECTOR_HPP_
+
+#include <atomic>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <limits>
+
+#include "utility/BitManipulation.hpp"
+#include "utility/Macros.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+
+/** \addtogroup Utility
+ *  @{
+ */
+
+/**
+ * @brief A bit vector that supports concurrent read/write operations, with a
+ *        RESTRICTED CONCURRENCY LEVEL that the read operations and the write
+ *        operations must be isolated with a (mostly implicit) barrier.
+ * 
+ * In other words, when using this bit vector, the read operations and write
+ * operations must be grouped into phases. Within a phase there can be either
+ * concurrent read operations or concurrent write operations, but not both (or
+ * the bit vector's behavior will be undefined).
+ **/
+class BarrieredReadWriteConcurrentBitVector {
+ public:
+  /**
+   * @brief Constructor for a bit vector with external storage.
+   *
+   * @param memory_location The location of memory to use for the bit vector.
+   * @param num_bits The length of the bit vector in bits.
+   * @param initialize If true, initialize all the bytes of the memory to 0.
+   */
+  BarrieredReadWriteConcurrentBitVector(void *memory_location,
+                                        const std::size_t num_bits,
+                                        const bool initialize)
+      : owned_(false),
+        num_bits_(num_bits),
+        data_array_(static_cast<DataType *>(memory_location)),
+        data_array_size_((num_bits >> kHigherOrderShift) + (num_bits & kLowerOrderMask ? 1 : 0)) {
+    DCHECK_GT(num_bits, 0u);
+    DCHECK(data_array_ != nullptr);
+
+    if (initialize) {
+      clear();
+    }
+  }
+
+  /**
+   * @brief Constructor for a bit vector which owns its own storage.
+   *
+   * @param num_bits The length of the bit vector in bits.
+   **/
+  explicit BarrieredReadWriteConcurrentBitVector(const std::size_t num_bits)
+      : owned_(true),
+        num_bits_(num_bits),
+        data_array_(static_cast<DataType *>(std::malloc(BytesNeeded(num_bits)))),
+        data_array_size_((num_bits >> kHigherOrderShift) + (num_bits & kLowerOrderMask ? 1 : 0)) {
+    DCHECK_GT(num_bits, 0u);
+    clear();
+  }
+
+  /**
+   * @brief Destructor. Frees any owned memory.
+   */
+  ~BarrieredReadWriteConcurrentBitVector() {
+    if (owned_) {
+      std::free(data_array_);
+    }
+  }
+
+  /**
+   * @brief Calculate the number of bytes needed to store a bit vector of the
+   *        given number of bits.
+   *
+   * @param num_bits The desired length of a BitVector in bits.
+   * @return The number of bytes needed for the BitVector.
+   **/
+  inline static std::size_t BytesNeeded(const std::size_t num_bits) {
+    if (num_bits & kLowerOrderMask) {
+      return ((num_bits >> kHigherOrderShift) + 1) * kDataSize;
+    } else {
+      return (num_bits >> kHigherOrderShift) * kDataSize;
+    }
+  }
+
+  /**
+   * @return The length of this bit vector in bits.
+   **/
+  inline std::size_t size() const {
+    return num_bits_;
+  }
+
+  /**
+   * @brief Clear this bit vector, setting all bits to zero.
+   **/
+  inline void clear() {
+    std::memset(data_array_, 0, BytesNeeded(num_bits_));
+  }
+
+  /**
+   * @brief Get the value of a single bit.
+   *
+   * @param bit_num The position of the desired bit.
+   * @return The value of the bit at bit_num.
+   **/
+  inline bool getBit(const std::size_t bit_num) const {
+    const std::size_t data_value =
+        data_array_[bit_num >> kHigherOrderShift].load(std::memory_order_relaxed);
+    return (data_value << (bit_num & kLowerOrderMask)) & kTopBit;
+  }
+
+  /**
+   * @brief Set the value of a single bit.
+   *
+   * @param bit_num The position of the desired bit.
+   * @param value The new value to set for the bit at bit_num.
+   **/
+  inline void setBit(const std::size_t bit_num) const {
+    data_array_[bit_num >> kHigherOrderShift].fetch_or(
+        kTopBit >> (bit_num & kLowerOrderMask), std::memory_order_relaxed);
+  }
+
+  /**
+   * @brief Find the first 1-bit AT or AFTER the specified position in this bit
+   *        vector.
+   *
+   * @param position The first bit to search for a one.
+   * @return The position of the first one AT or AFTER \p position in this bit
+   *         vector.
+   **/
+  inline std::size_t firstOne(std::size_t position = 0) const {
+    DCHECK_LT(position, num_bits_);
+
+    const std::size_t position_index = position >> kHigherOrderShift;
+    const std::size_t data_value =
+        data_array_[position_index].load(std::memory_order_relaxed)
+            & (std::numeric_limits<std::size_t>::max() >> (position & kLowerOrderMask));
+    if (data_value) {
+      return (position & ~kLowerOrderMask) | leading_zero_count<std::size_t>(data_value);
+    }
+
+    for (std::size_t array_idx = position_index + 1;
+         array_idx < data_array_size_;
+         ++array_idx) {
+      const std::size_t data_value =
+          data_array_[array_idx].load(std::memory_order_relaxed);
+      if (data_value) {
+        return (array_idx << kHigherOrderShift) | leading_zero_count<std::size_t>(data_value);
+      }
+    }
+
+    return num_bits_;
+  }
+
+  /**
+   * @brief Find the first 1-bit AFTER the specified position in this bit vector.
+   *
+   * @param position The first bit to search for a one.
+   * @return The position of the first one AFTER \p position in this bit vector.
+   **/
+  inline std::size_t nextOne(const std::size_t position) const {
+    const std::size_t search_pos = position + 1;
+    return search_pos >= num_bits_ ? num_bits_ : firstOne(search_pos);
+  }
+
+  /**
+   * @brief Count the total number of 1-bits in this bit vector.
+   *
+   * @return The number of ones in this bit vector.
+   **/
+  inline std::size_t onesCount() const {
+    std::size_t count = 0;
+    for (std::size_t array_idx = 0;
+         array_idx < data_array_size_;
+         ++array_idx) {
+      count += population_count<std::size_t>(
+          data_array_[array_idx].load(std::memory_order_relaxed));
+    }
+    return count;
+  }
+
+  /**
+   * @brief Count the total number of 1-bits in this bit vector within the
+   *        specified range (start point INCLUSIVE but end point EXCLUSIVE).
+   *
+   * @param The start position of the range.
+   * @param The end position of the range.
+   *
+   * @return The number of ones within the range, INCLUDING the 1-bit at
+   *         \p start_position, but EXCLUDING the 1-bit at \p end_position.
+   **/
+  inline std::size_t onesCountInRange(const std::size_t start_position,
+                                      const std::size_t end_position) const {
+    DCHECK_LE(start_position, end_position);
+    DCHECK_LT(start_position, num_bits_);
+    DCHECK_LE(end_position, num_bits_);
+
+    const std::size_t start_index = start_position >> kHigherOrderShift;
+    const std::size_t end_index = end_position >> kHigherOrderShift;
+    if (start_index == end_index) {
+      const std::size_t data_value =
+          data_array_[start_index].load(std::memory_order_relaxed)
+              & (std::numeric_limits<std::size_t>::max() >> (start_position & kLowerOrderMask))
+              &  ~(std::numeric_limits<std::size_t>::max() >> (end_position & kLowerOrderMask));
+      return population_count<std::size_t>(data_value);
+    } else {
+      const std::size_t first_data =
+          data_array_[start_index].load(std::memory_order_relaxed)
+              & (std::numeric_limits<std::size_t>::max() >> (start_position & kLowerOrderMask));
+      std::size_t count = population_count<std::size_t>(first_data);
+
+      for (std::size_t array_idx = start_index + 1;
+           array_idx < end_index;
+           ++array_idx) {
+        count += population_count<std::size_t>(
+            data_array_[array_idx].load(std::memory_order_relaxed));
+      }
+
+      const std::size_t last_offset = end_position & kLowerOrderMask;
+      if (last_offset != 0) {
+        const std::size_t last_data =
+            data_array_[end_index].load(std::memory_order_relaxed)
+                &  ~(std::numeric_limits<std::size_t>::max() >> last_offset);
+        count += population_count<std::size_t>(last_data);
+      }
+
+      return count;
+    }
+  }
+
+ private:
+  typedef std::atomic<std::size_t> DataType;
+  static constexpr std::size_t kDataSize = sizeof(DataType);
+
+  // This works as long as the bit-width of size_t is power of 2:
+  static constexpr std::size_t kLowerOrderMask = (sizeof(std::size_t) << 3) - 1;
+  // This works for 32-bit or 64-bit size_t:
+  static constexpr std::size_t kHigherOrderShift = sizeof(std::size_t) == 4 ? 5 : 6;
+
+  static constexpr std::size_t kOne = static_cast<std::size_t>(1);
+  static constexpr std::size_t kTopBit = kOne << kLowerOrderMask;
+
+  const bool owned_;
+  const std::size_t num_bits_;
+  DataType *data_array_;
+  const std::size_t data_array_size_;
+
+  DISALLOW_COPY_AND_ASSIGN(BarrieredReadWriteConcurrentBitVector);
+};
+
+/** @} */
+
+}  // namespace quickstep
+
+#endif  // QUICKSTEP_UTILITY_BARRIERED_READ_WRITE_CONCURRENT_BIT_VECTOR_HPP_

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2d89e4fb/utility/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/utility/CMakeLists.txt b/utility/CMakeLists.txt
index aeff388..ca04462 100644
--- a/utility/CMakeLists.txt
+++ b/utility/CMakeLists.txt
@@ -172,6 +172,9 @@ add_library(quickstep_utility_CalculateInstalledMemory CalculateInstalledMemory.
 add_library(quickstep_utility_Cast ../empty_src.cpp Cast.hpp)
 add_library(quickstep_utility_CheckSnprintf ../empty_src.cpp CheckSnprintf.hpp)
 add_library(quickstep_utility_CompositeHash ../empty_src.cpp CompositeHash.hpp)
+add_library(quickstep_utility_BarrieredReadWriteConcurrentBitVector
+            ../empty_src.cpp
+            BarrieredReadWriteConcurrentBitVector.hpp)
 add_library(quickstep_utility_DAG ../empty_src.cpp DAG.hpp)
 add_library(quickstep_utility_DisjointTreeForest ../empty_src.cpp DisjointTreeForest.hpp)
 add_library(quickstep_utility_EqualsAnyConstant ../empty_src.cpp EqualsAnyConstant.hpp)
@@ -238,6 +241,9 @@ target_link_libraries(quickstep_utility_CompositeHash
                       quickstep_types_TypedValue
                       quickstep_utility_HashPair
                       glog)
+target_link_libraries(quickstep_utility_BarrieredReadWriteConcurrentBitVector
+                      quickstep_utility_BitManipulation
+                      quickstep_utility_Macros)
 target_link_libraries(quickstep_utility_DAG
                       glog
                       quickstep_utility_Macros)
@@ -330,6 +336,7 @@ target_link_libraries(quickstep_utility_TreeStringSerializable
 add_library(quickstep_utility ../empty_src.cpp UtilityModule.hpp)
 target_link_libraries(quickstep_utility
                       quickstep_utility_Alignment
+                      quickstep_utility_BarrieredReadWriteConcurrentBitVector
                       quickstep_utility_BitManipulation
                       quickstep_utility_BitVector
                       quickstep_utility_BloomFilter