You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues-all@impala.apache.org by "Tim Armstrong (JIRA)" <ji...@apache.org> on 2018/06/21 21:25:00 UTC
[jira] [Commented] (IMPALA-6425) Change Mempool memory allocation
size to be <1MB to avoid allocating from CentralFreeList
[ https://issues.apache.org/jira/browse/IMPALA-6425?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16519788#comment-16519788 ]
Tim Armstrong commented on IMPALA-6425:
---------------------------------------
I did an experiment with setting it to 512KB and single-node perf results look promising.
{code}
commit 2e41927d29a00ddaa7c18e02650a8a04b028abff
Author: Tim Armstrong <ta...@cloudera.com>
Date: Mon Jun 11 17:40:33 2018 -0700
Decrease chunk size
Change-Id: I57589e4f5840739c4f6d58ed40b73a18afbeb501
diff --git a/be/src/runtime/mem-pool.h b/be/src/runtime/mem-pool.h
index 2d12da8..03f05e2 100644
--- a/be/src/runtime/mem-pool.h
+++ b/be/src/runtime/mem-pool.h
@@ -178,8 +178,9 @@ class MemPool {
static const int INITIAL_CHUNK_SIZE = 4 * 1024;
/// The maximum size of chunk that should be allocated. Allocations larger than this
- /// size will get their own individual chunk.
- static const int MAX_CHUNK_SIZE = 1024 * 1024;
+ /// size will get their own individual chunk. Chosen to be small enough that it gets
+ /// a freelist in TCMalloc's central cache.
+ static const int MAX_CHUNK_SIZE = 512 * 1024;
struct ChunkInfo {
uint8_t* data; // Owned by the ChunkInfo.
{code}
{noformat}
+-------------------+-----------------------+---------+------------+------------+----------------+
| Workload | File Format | Avg (s) | Delta(Avg) | GeoMean(s) | Delta(GeoMean) |
+-------------------+-----------------------+---------+------------+------------+----------------+
| TARGETED-PERF(60) | parquet / none / none | 19.20 | -1.42% | 3.94 | -0.38% |
| TPCH(60) | parquet / none / none | 18.22 | -0.12% | 11.70 | -0.51% |
+-------------------+-----------------------+---------+------------+------------+----------------+
+-------------------+--------------------------------------------------------+-----------------------+--------+-------------+------------+------------+----------------+-------------+-------+
| Workload | Query | File Format | Avg(s) | Base Avg(s) | Delta(Avg) | StdDev(%) | Base StdDev(%) | Num Clients | Iters |
+-------------------+--------------------------------------------------------+-----------------------+--------+-------------+------------+------------+----------------+-------------+-------+
| TARGETED-PERF(60) | primitive_filter_string_non_selective | parquet / none / none | 1.64 | 1.52 | +7.92% | * 14.21% * | 8.68% | 1 | 30 |
| TARGETED-PERF(60) | primitive_exchange_broadcast | parquet / none / none | 41.10 | 39.49 | +4.06% | * 10.69% * | 6.88% | 1 | 30 |
| TPCH(60) | TPCH-Q13 | parquet / none / none | 28.02 | 27.25 | +2.86% | 1.38% | 1.55% | 1 | 30 |
| TARGETED-PERF(60) | primitive_filter_bigint_non_selective | parquet / none / none | 1.17 | 1.14 | +2.82% | 7.02% | * 10.13% * | 1 | 30 |
| TARGETED-PERF(60) | primitive_conjunct_ordering_1 | parquet / none / none | 0.12 | 0.12 | +2.67% | * 18.64% * | * 17.54% * | 1 | 30 |
| TARGETED-PERF(60) | primitive_conjunct_ordering_3 | parquet / none / none | 0.26 | 0.25 | +2.55% | 1.11% | 6.01% | 1 | 30 |
| TARGETED-PERF(60) | primitive_topn_bigint | parquet / none / none | 4.39 | 4.30 | +2.08% | * 10.77% * | * 13.97% * | 1 | 30 |
| TARGETED-PERF(60) | string_filter_in_predicate | parquet / none / none | 2.09 | 2.06 | +1.46% | * 10.59% * | 7.01% | 1 | 30 |
| TARGETED-PERF(60) | PERF_AGG-Q7 | parquet / none / none | 2.41 | 2.38 | +1.13% | 6.27% | 5.75% | 1 | 30 |
| TARGETED-PERF(60) | primitive_min_max_runtime_filter | parquet / none / none | 1.91 | 1.89 | +0.99% | 1.90% | 1.90% | 1 | 30 |
| TARGETED-PERF(60) | primitive_orderby_bigint | parquet / none / none | 4.95 | 4.90 | +0.92% | 4.00% | 4.18% | 1 | 30 |
| TPCH(60) | TPCH-Q2 | parquet / none / none | 3.38 | 3.35 | +0.85% | 5.01% | 5.70% | 1 | 30 |
| TARGETED-PERF(60) | primitive_shuffle_join_union_all_with_groupby | parquet / none / none | 57.96 | 57.47 | +0.85% | 0.95% | 1.17% | 1 | 30 |
| TARGETED-PERF(60) | PERF_STRING-Q6 | parquet / none / none | 12.02 | 11.93 | +0.71% | 1.02% | 0.93% | 1 | 30 |
| TARGETED-PERF(60) | PERF_STRING-Q2 | parquet / none / none | 3.65 | 3.63 | +0.69% | 1.15% | 1.24% | 1 | 30 |
| TARGETED-PERF(60) | PERF_AGG-Q3 | parquet / none / none | 5.99 | 5.95 | +0.68% | 3.03% | 3.43% | 1 | 30 |
| TPCH(60) | TPCH-Q7 | parquet / none / none | 47.35 | 47.05 | +0.63% | 1.21% | 1.28% | 1 | 30 |
| TARGETED-PERF(60) | primitive_broadcast_join_1 | parquet / none / none | 0.26 | 0.26 | +0.52% | 4.88% | 4.91% | 1 | 30 |
| TPCH(60) | TPCH-Q21 | parquet / none / none | 93.21 | 92.74 | +0.51% | 1.10% | 0.82% | 1 | 30 |
| TARGETED-PERF(60) | primitive_groupby_bigint_lowndv | parquet / none / none | 2.91 | 2.90 | +0.51% | 5.34% | 5.67% | 1 | 30 |
| TARGETED-PERF(60) | primitive_filter_in_predicate | parquet / none / none | 1.87 | 1.87 | +0.46% | 1.64% | 1.76% | 1 | 30 |
| TARGETED-PERF(60) | primitive_orderby_all | parquet / none / none | 21.60 | 21.51 | +0.41% | 3.85% | 1.99% | 1 | 30 |
| TPCH(60) | TPCH-Q11 | parquet / none / none | 2.74 | 2.73 | +0.31% | 1.73% | 1.30% | 1 | 30 |
| TARGETED-PERF(60) | primitive_shuffle_join_one_to_many_string_with_groupby | parquet / none / none | 177.02 | 176.49 | +0.30% | 0.52% | 0.55% | 1 | 30 |
| TARGETED-PERF(60) | primitive_conjunct_ordering_5 | parquet / none / none | 12.31 | 12.28 | +0.27% | 2.03% | 2.22% | 1 | 30 |
| TARGETED-PERF(60) | PERF_STRING-Q4 | parquet / none / none | 4.19 | 4.17 | +0.26% | 0.92% | 1.07% | 1 | 30 |
| TARGETED-PERF(60) | primitive_filter_string_selective | parquet / none / none | 1.02 | 1.02 | +0.13% | 2.85% | 2.95% | 1 | 30 |
| TARGETED-PERF(60) | primitive_filter_decimal_selective | parquet / none / none | 1.47 | 1.47 | +0.12% | 1.96% | 2.16% | 1 | 30 |
| TPCH(60) | TPCH-Q9 | parquet / none / none | 41.66 | 41.64 | +0.03% | 0.87% | 0.60% | 1 | 30 |
| TPCH(60) | TPCH-Q4 | parquet / none / none | 6.58 | 6.58 | -0.01% | 2.26% | 1.36% | 1 | 30 |
| TPCH(60) | TPCH-Q17 | parquet / none / none | 12.18 | 12.18 | -0.07% | 2.24% | 2.02% | 1 | 30 |
| TPCH(60) | TPCH-Q15 | parquet / none / none | 9.48 | 9.49 | -0.08% | 0.98% | 0.83% | 1 | 30 |
| TARGETED-PERF(60) | PERF_LIMIT-Q1 | parquet / none / none | 0.05 | 0.05 | -0.11% | 0.31% | 1.36% | 1 | 30 |
| TPCH(60) | TPCH-Q18 | parquet / none / none | 36.96 | 37.04 | -0.19% | 1.24% | 0.83% | 1 | 30 |
| TARGETED-PERF(60) | primitive_top-n_all | parquet / none / none | 38.90 | 38.97 | -0.19% | 1.21% | 1.39% | 1 | 30 |
| TARGETED-PERF(60) | primitive_filter_string_like | parquet / none / none | 17.66 | 17.71 | -0.25% | 0.76% | 0.90% | 1 | 30 |
| TARGETED-PERF(60) | primitive_conjunct_ordering_2 | parquet / none / none | 9.81 | 9.83 | -0.28% | 1.20% | 1.29% | 1 | 30 |
| TPCH(60) | TPCH-Q1 | parquet / none / none | 14.20 | 14.24 | -0.32% | 2.89% | 2.71% | 1 | 30 |
| TPCH(60) | TPCH-Q8 | parquet / none / none | 13.79 | 13.84 | -0.33% | 4.13% | 4.04% | 1 | 30 |
| TARGETED-PERF(60) | primitive_groupby_decimal_highndv | parquet / none / none | 15.27 | 15.32 | -0.34% | 2.11% | 2.05% | 1 | 30 |
| TARGETED-PERF(60) | PERF_STRING-Q3 | parquet / none / none | 4.09 | 4.11 | -0.37% | 1.85% | 1.01% | 1 | 30 |
| TARGETED-PERF(60) | PERF_STRING-Q5 | parquet / none / none | 5.03 | 5.05 | -0.37% | 0.88% | 0.95% | 1 | 30 |
| TARGETED-PERF(60) | PERF_STRING-Q7 | parquet / none / none | 8.84 | 8.87 | -0.38% | 1.30% | 1.17% | 1 | 30 |
| TPCH(60) | TPCH-Q20 | parquet / none / none | 6.07 | 6.10 | -0.39% | 1.43% | 1.14% | 1 | 30 |
| TPCH(60) | TPCH-Q10 | parquet / none / none | 12.15 | 12.21 | -0.46% | 1.77% | 1.73% | 1 | 30 |
| TPCH(60) | TPCH-Q6 | parquet / none / none | 3.30 | 3.32 | -0.48% | 1.02% | 1.61% | 1 | 30 |
| TARGETED-PERF(60) | primitive_groupby_bigint_highndv | parquet / none / none | 19.67 | 19.77 | -0.50% | 1.54% | 1.85% | 1 | 30 |
| TARGETED-PERF(60) | PERF_AGG-Q4 | parquet / none / none | 13.08 | 13.15 | -0.53% | 2.40% | 2.16% | 1 | 30 |
| TARGETED-PERF(60) | primitive_small_join_1 | parquet / none / none | 0.24 | 0.24 | -0.58% | * 11.31% * | 9.57% | 1 | 30 |
| TPCH(60) | TPCH-Q16 | parquet / none / none | 5.18 | 5.21 | -0.60% | 3.21% | 3.33% | 1 | 30 |
| TPCH(60) | TPCH-Q19 | parquet / none / none | 11.33 | 11.41 | -0.67% | 1.56% | 1.71% | 1 | 30 |
| TARGETED-PERF(60) | primitive_empty_build_join_1 | parquet / none / none | 3.26 | 3.28 | -0.69% | 0.92% | 1.93% | 1 | 30 |
| TARGETED-PERF(60) | PERF_AGG-Q5 | parquet / none / none | 0.69 | 0.70 | -0.74% | 4.00% | 4.55% | 1 | 30 |
| TARGETED-PERF(60) | PERF_AGG-Q1 | parquet / none / none | 2.15 | 2.16 | -0.79% | 6.32% | 5.80% | 1 | 30 |
| TARGETED-PERF(60) | PERF_STRING-Q1 | parquet / none / none | 3.21 | 3.23 | -0.80% | 1.31% | 1.26% | 1 | 30 |
| TPCH(60) | TPCH-Q22 | parquet / none / none | 6.13 | 6.18 | -0.82% | 2.73% | 2.29% | 1 | 30 |
| TPCH(60) | TPCH-Q3 | parquet / none / none | 15.48 | 15.62 | -0.89% | 3.28% | 1.87% | 1 | 30 |
| TPCH(60) | TPCH-Q12 | parquet / none / none | 7.79 | 7.86 | -0.96% | 1.51% | 1.91% | 1 | 30 |
| TARGETED-PERF(60) | PERF_AGG-Q2 | parquet / none / none | 5.48 | 5.54 | -1.04% | 3.87% | 4.02% | 1 | 30 |
| TARGETED-PERF(60) | PERF_AGG-Q6 | parquet / none / none | 2.41 | 2.44 | -1.16% | * 14.09% * | 8.75% | 1 | 30 |
| TARGETED-PERF(60) | primitive_broadcast_join_2 | parquet / none / none | 1.05 | 1.07 | -1.29% | 3.73% | 4.37% | 1 | 30 |
| TPCH(60) | TPCH-Q14 | parquet / none / none | 7.25 | 7.35 | -1.42% | 1.51% | 1.17% | 1 | 30 |
| TARGETED-PERF(60) | primitive_groupby_bigint_pk | parquet / none / none | 88.29 | 89.58 | -1.44% | 2.34% | 1.97% | 1 | 30 |
| TARGETED-PERF(60) | primitive_filter_decimal_non_selective | parquet / none / none | 1.99 | 2.02 | -1.47% | * 12.36% * | * 20.35% * | 1 | 30 |
| TARGETED-PERF(60) | primitive_broadcast_join_3 | parquet / none / none | 8.80 | 8.97 | -1.89% | 2.01% | 1.80% | 1 | 30 |
| TARGETED-PERF(60) | primitive_conjunct_ordering_4 | parquet / none / none | 0.37 | 0.38 | -3.19% | 6.24% | 6.72% | 1 | 30 |
| TARGETED-PERF(60) | primitive_exchange_shuffle | parquet / none / none | 74.24 | 76.69 | -3.20% | 1.65% | 2.53% | 1 | 30 |
| TARGETED-PERF(60) | primitive_long_predicate | parquet / none / none | 251.27 | 263.07 | -4.49% | 1.02% | 0.84% | 1 | 30 |
| TARGETED-PERF(60) | primitive_filter_bigint_selective | parquet / none / none | 0.15 | 0.15 | -4.74% | * 14.24% * | 8.57% | 1 | 30 |
| TPCH(60) | TPCH-Q5 | parquet / none / none | 16.70 | 18.01 | -7.31% | * 31.82% * | * 27.47% * | 1 | 30 |
| TARGETED-PERF(60) | primitive_groupby_decimal_lowndv.test | parquet / none / none | 2.34 | 2.81 | -16.75% | 4.58% | 4.11% | 1 | 30 |
+-------------------+--------------------------------------------------------+-----------------------+--------+-------------+------------+------------+----------------+-------------+-------+
{noformat}
> Change Mempool memory allocation size to be <1MB to avoid allocating from CentralFreeList
> -----------------------------------------------------------------------------------------
>
> Key: IMPALA-6425
> URL: https://issues.apache.org/jira/browse/IMPALA-6425
> Project: IMPALA
> Issue Type: Improvement
> Affects Versions: Impala 2.11.0
> Reporter: Mostafa Mokhtar
> Assignee: Michael Ho
> Priority: Major
>
> While [~tlipcon] was investigating KRPC contention he noticed that MemPool::Allocate is doing 1MB allocations, which is somewhat of an anti-pattern with tcmalloc.
> During the tests MemPool was doing several thousand 1MB allocs per second and those have to do a full scan of the tcmalloc span linked list, which is very slow and only gets slower
> 1040384 bytes on the other hand is constant time.
>
> It is not clear if a Power of 2 allocation size would help, worth experimenting with 512KB and 1040384 bytes.
> {code}
> /// The maximum size of chunk that should be allocated. Allocations larger than this
> /// size will get their own individual chunk.
> static const int MAX_CHUNK_SIZE = 8192*127
> {code}
>
> {code}
> #0 0x0000000002097407 in base::internal::SpinLockDelay(int volatile*, int, int) ()
> #1 0x00000000020e2049 in SpinLock::SlowLock() ()
> #2 0x0000000002124348 in tcmalloc::CentralFreeList::Populate() ()
> #3 0x0000000002124458 in tcmalloc::CentralFreeList::FetchFromOneSpansSafe(int, void**, void**) ()
> #4 0x00000000021244e8 in tcmalloc::CentralFreeList::RemoveRange(void**, void**, int) ()
> #5 0x0000000002131ee5 in tcmalloc::ThreadCache::FetchFromCentralCache(unsigned int, int, void* (*)(unsigned long)) ()
> #6 0x0000000000b2879a in impala::MemPool::FindChunk(long, bool) ()
> #7 0x0000000000b364f6 in impala::MemPool::Allocate(long) ()
> #8 0x0000000000b36674 in impala::FreePool::Allocate(long) ()
> #9 0x0000000000b353db in impala::RowBatch::Deserialize(kudu::Slice const&, kudu::Slice const&, long, bool, impala::FreePool*) ()
> #10 0x0000000000b35795 in impala::RowBatch::RowBatch(impala::RowDescriptor const*, impala::RowBatchHeaderPB const&, kudu::Slice const&, kudu::Slice const&, impala::FreePool*) ()
> #11 0x0000000000b1644f in impala::KrpcDataStreamRecvr::SenderQueue::AddBatchWork(long, impala::RowBatchHeaderPB const&, kudu::Slice const&, kudu::Slice const&, boost::unique_lock<impala::SpinLock>*) ()
> #12 0x0000000000b19135 in impala::KrpcDataStreamRecvr::SenderQueue::AddBatch(impala::TransmitDataRequestPB const*, kudu::rpc::RpcContext*) ()
> #13 0x0000000000b0ee30 in impala::KrpcDataStreamMgr::AddData(impala::TransmitDataRequestPB const*, kudu::rpc::RpcContext*) ()
> #14 0x0000000001187035 in kudu::rpc::GeneratedServiceIf::Handle(kudu::rpc::InboundCall*) ()
> #15 0x00000000011bc1cd in impala::ImpalaServicePool::RunThread(long) ()
> {code}
>
> Also it appears that the thread above was a victim of thread below, yet allocations <1MB will make MemPool::Allocate content less over the CentralFreeList lock.
> {code}
> #0 0x0000003173ae5407 in madvise () from /lib64/libc.so.6
> #1 0x0000000002131cca in TCMalloc_SystemRelease(void*, unsigned long) ()
> #2 0x000000000212f26a in tcmalloc::PageHeap::DecommitSpan(tcmalloc::Span*) ()
> #3 0x000000000212f505 in tcmalloc::PageHeap::MergeIntoFreeList(tcmalloc::Span*) ()
> #4 0x000000000212f864 in tcmalloc::PageHeap::Delete(tcmalloc::Span*) ()
> #5 0x0000000002123cf7 in tcmalloc::CentralFreeList::ReleaseToSpans(void*) ()
> #6 0x0000000002123d9b in tcmalloc::CentralFreeList::ReleaseListToSpans(void*) ()
> #7 0x0000000002124067 in tcmalloc::CentralFreeList::InsertRange(void*, void*, int) ()
> #8 0x00000000021320a4 in tcmalloc::ThreadCache::ReleaseToCentralCache(tcmalloc::ThreadCache::FreeList*, unsigned int, int) ()
> #9 0x0000000002132575 in tcmalloc::ThreadCache::ListTooLong(tcmalloc::ThreadCache::FreeList*, unsigned int) ()
> #10 0x0000000000b276e0 in impala::MemPool::FreeAll() ()
> #11 0x0000000000b34655 in impala::RowBatch::Reset() ()
> #12 0x0000000000fe882f in impala::PartitionedAggregationNode::GetRowsStreaming(impala::RuntimeState*, impala::RowBatch*) ()
> #13 0x0000000000fe9771 in impala::PartitionedAggregationNode::GetNext(impala::RuntimeState*, impala::RowBatch*, bool*) ()
> #14 0x0000000000b78352 in impala::FragmentInstanceState::ExecInternal() ()
> #15 0x0000000000b7adc2 in impala::FragmentInstanceState::Exec() ()
> #16 0x0000000000b6a0da in impala::QueryState::ExecFInstance(impala::FragmentInstanceState*) ()
> {code}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-all-unsubscribe@impala.apache.org
For additional commands, e-mail: issues-all-help@impala.apache.org