You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by na...@apache.org on 2016/07/16 22:56:52 UTC
incubator-quickstep git commit: Adaptively adjust the batch size.
Repository: incubator-quickstep
Updated Branches:
refs/heads/expt_bloom_filter_hash_fn 6369ee91c -> d04402d84
Adaptively adjust the batch size.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/d04402d8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/d04402d8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/d04402d8
Branch: refs/heads/expt_bloom_filter_hash_fn
Commit: d04402d842a1c63cc2000c7a827d4c7386330fd2
Parents: 6369ee9
Author: Navneet Potti <na...@gmail.com>
Authored: Sat Jul 16 17:10:32 2016 -0500
Committer: Navneet Potti <na...@gmail.com>
Committed: Sat Jul 16 17:49:31 2016 -0500
----------------------------------------------------------------------
storage/HashTable.hpp | 18 +++++++++++++++---
1 file changed, 15 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/d04402d8/storage/HashTable.hpp
----------------------------------------------------------------------
diff --git a/storage/HashTable.hpp b/storage/HashTable.hpp
index 564b783..a74d71c 100644
--- a/storage/HashTable.hpp
+++ b/storage/HashTable.hpp
@@ -2274,12 +2274,22 @@ void HashTable<ValueT, resizable, serializable, force_key_copy, allow_duplicate_
bloom_filter_adapter.reset(new BloomFilterAdapter(
probe_bloom_filters_, probe_attribute_ids_, attr_size_vectors));
- static const uint32_t kMaxBatchSize = FLAGS_bloom_adapter_batch_size;
+ // We want to have large batch sizes for cache efficiency while probeing,
+ // but small batch sizes to ensure that the adaptation logic kicks in
+ // (and does early). We use exponentially increasing batch sizes to
+ // achieve a balance between the two.
+ //
+ // We also keep track of num_tuples_left in the block, to ensure that
+ // we don't reserve an unnecessarily large vector.
+ std::uint32_t batch_size_try = FLAGS_bloom_adapter_batch_size;
+ std::uint32_t num_tuples_left = accessor->getNumTuples();
std::vector<tuple_id> batch;
- batch.reserve(kMaxBatchSize);
do {
- while (batch.size() < kMaxBatchSize && accessor->next())
+ std::uint32_t batch_size =
+ batch_size_try > num_tuples_left ? batch_size_try : num_tuples_left;
+ batch.reserve(batch_size);
+ while (batch.size() < batch_size && accessor->next())
batch.push_back(accessor->getCurrentPosition());
std::size_t num_hits = bloom_filter_adapter->bulkProbe(accessor, batch);
@@ -2303,6 +2313,8 @@ void HashTable<ValueT, resizable, serializable, force_key_copy, allow_duplicate_
}
}
batch.clear();
+ num_tuples_left -= batch_size;
+ batch_size_try = batch_size * 2;
} while (!accessor->iterationFinished());
}