You are viewing a plain text version of this content. The canonical link for it is here.

Posted to user@cassandra.apache.org by Dathan Pattishall <da...@gmail.com> on 2010/07/26 21:22:46 UTC

what causes MESSAGE-DESERIALIZER-POOL to spike

I have 4 nodes on enterprise type hardware (Lots of Ram 12GB, 16 i7 cores,
RAID Disks).

~# /opt/cassandra/bin/nodetool --host=localhost --port=8181 tpstats
Pool Name                    Active   Pending      Completed
STREAM-STAGE                      0         0              0
RESPONSE-STAGE                    0         0         516280
ROW-READ-STAGE                    8      4096        1164326
LB-OPERATIONS                     0         0              0
*MESSAGE-DESERIALIZER-POOL         1    682008        1818682*
GMFD                              0         0           6467
LB-TARGET                         0         0              0
CONSISTENCY-MANAGER               0         0         661477
ROW-MUTATION-STAGE                0         0         998780
MESSAGE-STREAMING-POOL            0         0              0
LOAD-BALANCER-STAGE               0         0              0
FLUSH-SORTER-POOL                 0         0              0
MEMTABLE-POST-FLUSHER             0         0              4
FLUSH-WRITER-POOL                 0         0              4
AE-SERVICE-STAGE                  0         0              0
HINTED-HANDOFF-POOL               0         0              3

EQX root@cass04:~# vmstat -n 1

procs -----------memory---------- ---swap-- -----io---- --system--
-----cpu------
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id
wa st
 6 10   7096 121816  16244 10375492    0    0     1     3    0    0  5  1
94  0  0
 2 10   7096 116484  16248 10381144    0    0  5636     4 21210 9820  2  1
79 18  0
 1  9   7096 108920  16248 10387592    0    0  6216     0 21439 9878  2  1
81 16  0
 0  9   7096 129108  16248 10364852    0    0  6024     0 23280 8753  2  1
80 17  0
 2  9   7096 122460  16248 10370908    0    0  6072     0 20835 9461  2  1
83 14  0
 2  8   7096 115740  16260 10375752    0    0  5168   292 21049 9511  3  1
77 20  0
 1 10   7096 108424  16260 10382300    0    0  6244     0 21483 8981  2  1
75 22  0
 3  8   7096 125028  16260 10364104    0    0  5584     0 21238 9436  2  1
81 16  0
 3  9   7096 117928  16260 10370064    0    0  5988     0 21505 10225  2  1
77 19  0
 1  8   7096 109544  16260 10376640    0    0  6340    28 20840 8602  2  1
80 18  0
 0  9   7096 127028  16240 10357652    0    0  5984     0 20853 9158  2  1
79 18  0
 9  0   7096 121472  16240 10363492    0    0  5716     0 20520 8489  1  1
82 16  0
 3  9   7096 112668  16240 10369872    0    0  6404     0 21314 9459  2  1
84 13  0
 1  9   7096 127300  16236 10353440    0    0  5684     0 38914 10068  2  1
76 21  0


*But the 16 cores are hardly utilized. Which indicates to me there is some
bad thread thrashing, but why? *



  1  [|||||                                               8.3%]     Tasks:
1070 total, 1 running
  2  [                                                    0.0%]     Load
average: 8.34 9.05 8.82
  3  [                                                    0.0%]     Uptime:
192 days(!), 15:29:52
  4  [|||||||||||                                        17.9%]
  5  [|||||                                               5.7%]
  6  [||                                                  1.3%]
  7  [||                                                  2.6%]
  8  [|                                                   0.6%]
  9  [|                                                   0.6%]
  10 [||                                                  1.9%]
  11 [||                                                  1.9%]
  12 [||                                                  1.9%]
  13 [||                                                  1.3%]
  14 [|                                                   0.6%]
  15 [||                                                  1.3%]
  16 [|                                                   0.6%]
  Mem[||||||||||||||||||||||||||||||||||||||||||||1791/12028MB]
  Swp[|                                               6/1983MB]

  PID USER     PRI  NI  VIRT   RES   SHR S CPU% MEM%   TIME+  Command
30269 root      40   0 14100  2116   900 R  4.0  0.0  0:00.49 htop
24878 root      40   0 20.6G 8345M 6883M D  3.0 69.4  1:23.03
/opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
24879 root      40   0 20.6G 8345M 6883M D  3.0 69.4  1:22.93
/opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
24874 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:22.73
/opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
24880 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:22.93
/opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
24875 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:23.17
/opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
24658 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:23.06
/opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
24877 root      40   0 20.6G 8345M 6883M S  2.0 69.4  1:23.43
/opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
24873 root      40   0 20.6G 8345M 6883M D  1.0 69.4  1:23.65
/opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
24876 root      40   0 20.6G 8345M 6883M S  1.0 69.4  1:23.62
/opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
24942 root      40   0 20.6G 8345M 6883M S  1.0 69.4  0:23.50
/opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
24943 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:29.53
/opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
24933 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:22.57
/opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
24939 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:12.73
/opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
25280 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:00.10
/opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark

Re: what causes MESSAGE-DESERIALIZER-POOL to spike

Posted by Dathan Pattishall <da...@gmail.com>.

Okay so why would the pending be so large? I looked at iostat and the
diskload is not bad at all. The service times /etc are all good.
If it's not disk based I/O then is it thread thrashing? If so which thread
pool should I monitor more closely.


MESSAGE-DESERIALIZER-POOL:1

which would indicate there is one thread. If the design dictated one-thread
I would assume that its a service that should not have a pending queue.
Maybe there is a limitation in Cassandra

Name: MESSAGE-DESERIALIZER-POOL:1
State: WAITING on
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject@3c48e8f5
Total blocked: 10  Total waited: 5,427,548

Stack trace:
sun.misc.Unsafe.park(Native Method)
java.util.concurrent.locks.LockSupport.park(LockSupport.java:158)
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:1925)
java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:358)
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:947)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:907)
java.lang.Thread.run(Thread.java:619)



shows that there is a huge amount of waiting. Is there a tunable that I can
raise to remove this queue? Is it okay to have waited this high? If not what
is a bad threshold?


On Mon, Jul 26, 2010 at 1:31 PM, Peter Schuller <peter.schuller@infidyne.com
> wrote:

> > Hpw does one set/get the read-concurrency? Also which read-concurrency,
> for
> > which pool? According to the threads tab in jconsole read-concurrency
> could
> > be among different pools. There are row-read-stag threads, lots of write
> > threads, row mutation stage etc.
>
> Short version: Ignoremy concurrency speculation as it doesn't apply.
>
> Long version:
>
> I meant the ConcurrentReads settings in
> storage-conf.xml/cassandra.yaml (depending on version), but I was
> speculating.
>
> Having a look now, I presume it's not set to 1 because a minimum of 2
> seems to be required (DatabaseDescriptor.java). And since you ask
> where I suspect it's at its default value of 8 anyway.
>
> Also, looking at StageManager it appears this is applied to
> ROW-READ-STAGE so is indeed unrelated to MESSAGE-DESERIALIZER-POOL.
>
> MESSAGE-DESERIALIZER-POOL seems to be unrelated to disk I/O
> alltogether and seems to be used by MessagingService.java. So - please
> ignore my read concurrency speculation as it doesn't apply.
>
> --
> / Peter Schuller
>

Re: what causes MESSAGE-DESERIALIZER-POOL to spike

Posted by Peter Schuller <pe...@infidyne.com>.

> Hpw does one set/get the read-concurrency? Also which read-concurrency, for
> which pool? According to the threads tab in jconsole read-concurrency could
> be among different pools. There are row-read-stag threads, lots of write
> threads, row mutation stage etc.

Short version: Ignoremy concurrency speculation as it doesn't apply.

Long version:

I meant the ConcurrentReads settings in
storage-conf.xml/cassandra.yaml (depending on version), but I was
speculating.

Having a look now, I presume it's not set to 1 because a minimum of 2
seems to be required (DatabaseDescriptor.java). And since you ask
where I suspect it's at its default value of 8 anyway.

Also, looking at StageManager it appears this is applied to
ROW-READ-STAGE so is indeed unrelated to MESSAGE-DESERIALIZER-POOL.

MESSAGE-DESERIALIZER-POOL seems to be unrelated to disk I/O
alltogether and seems to be used by MessagingService.java. So - please
ignore my read concurrency speculation as it doesn't apply.

-- 
/ Peter Schuller

Re: what causes MESSAGE-DESERIALIZER-POOL to spike

Posted by Dathan Pattishall <da...@gmail.com>.

Hpw does one set/get the read-concurrency? Also which read-concurrency, for
which pool? According to the threads tab in jconsole read-concurrency could
be among different pools. There are row-read-stag threads, lots of write
threads, row mutation stage etc.






On Mon, Jul 26, 2010 at 12:56 PM, Peter Schuller <
peter.schuller@infidyne.com> wrote:

> > MESSAGE-DESERIALIZER-POOL         1    682008        1818682
>
> On the (unverified by me) assumption that disk reads en up in this
> stage as Anthony indicates, and looking at the above line and the
> iostat output, makes me want to suggest that maybe the read
> concurrency is set to 1 and you need to increase it in order to fully
> utilize your disks.
>
> That said, I have no idea why you would have as many as 682 THOUSAND
> pending for that stage (but I'm not sure at what granularity these are
> scheduled so perhaps it is normal for e.g. big slicing reads? I don't
> know.).
>
> --
> / Peter Schuller
>

Re: what causes MESSAGE-DESERIALIZER-POOL to spike

Posted by Peter Schuller <pe...@infidyne.com>.

> MESSAGE-DESERIALIZER-POOL         1    682008        1818682

On the (unverified by me) assumption that disk reads en up in this
stage as Anthony indicates, and looking at the above line and the
iostat output, makes me want to suggest that maybe the read
concurrency is set to 1 and you need to increase it in order to fully
utilize your disks.

That said, I have no idea why you would have as many as 682 THOUSAND
pending for that stage (but I'm not sure at what granularity these are
scheduled so perhaps it is normal for e.g. big slicing reads? I don't
know.).

-- 
/ Peter Schuller

Re: what causes MESSAGE-DESERIALIZER-POOL to spike

Posted by Dathan Pattishall <da...@gmail.com>.

@Garo htop.

On Mon, Jul 26, 2010 at 11:28 PM, Juho Mäkinen <ju...@gmail.com>wrote:

> Off topic, but what was this tool which prints per cpu utilization?
>
>  - Garo
>
> On Mon, Jul 26, 2010 at 10:22 PM, Dathan Pattishall <da...@gmail.com>
> wrote:
> > But the 16 cores are hardly utilized. Which indicates to me there is some
> > bad thread thrashing, but why?
> >
> >   1  [|||||                                               8.3%]
> Tasks:
> > 1070 total, 1 running
> >   2  [                                                    0.0%]     Load
> > average: 8.34 9.05 8.82
> >   3  [                                                    0.0%]
> Uptime:
> > 192 days(!), 15:29:52
> >   4  [|||||||||||                                        17.9%]
> > ...
> >   Mem[||||||||||||||||||||||||||||||||||||||||||||1791/12028MB]
> >   Swp[|                                               6/1983MB]
>

Re: what causes MESSAGE-DESERIALIZER-POOL to spike

Posted by Juho Mäkinen <ju...@gmail.com>.

Off topic, but what was this tool which prints per cpu utilization?

 - Garo

On Mon, Jul 26, 2010 at 10:22 PM, Dathan Pattishall <da...@gmail.com> wrote:
> But the 16 cores are hardly utilized. Which indicates to me there is some
> bad thread thrashing, but why?
>
>   1  [|||||                                               8.3%]     Tasks:
> 1070 total, 1 running
>   2  [                                                    0.0%]     Load
> average: 8.34 9.05 8.82
>   3  [                                                    0.0%]     Uptime:
> 192 days(!), 15:29:52
>   4  [|||||||||||                                        17.9%]
> ...
>   Mem[||||||||||||||||||||||||||||||||||||||||||||1791/12028MB]
>   Swp[|                                               6/1983MB]

Re: what causes MESSAGE-DESERIALIZER-POOL to spike

Posted by Jonathan Ellis <jb...@gmail.com>.

Let's centralize that conversation over on
https://issues.apache.org/jira/browse/CASSANDRA-1358

On Wed, Aug 4, 2010 at 4:32 PM, Mike Malone <mi...@simplegeo.com> wrote:
> So after 4096 messages get pushed on the row-read-stage queue (or any other
> multiThreadedStage) the deserializer basically becomes a single-threaded
> blocking queue that prevents any other inter-node RPC from occurring..?
> Sounds like it's a problem either way. If the row read stage is what's
> backed up, why not have the messages stack up on that stage?
> Mike
>
> On Wed, Aug 4, 2010 at 11:46 AM, Jonathan Ellis <jb...@gmail.com> wrote:
>>
>> No, MDP is backing up because Row-Read-Stage [the stage after MDP on
>> reads] is full at 4096, meaning you're not able to process reads as
>> quickly as the requests are coming in.
>>
>> On Wed, Aug 4, 2010 at 2:21 PM, Mike Malone <mi...@simplegeo.com> wrote:
>> > This may be your
>> > problem: https://issues.apache.org/jira/browse/CASSANDRA-1358
>> > The message deserializer executor is being created with a core pool size
>> > of
>> > 1. Since it uses a queue with unbounded capacity new requests are always
>> > queued and the thread pool never grows. So the message deserializer
>> > becomes
>> > a single-threaded bottleneck through which all traffic must pass. So
>> > your 16
>> > cores are reduced to one core for handling all inter-node communication
>> > (and
>> > any intra-node communication that's being passed through the messaging
>> > service).
>> > Mike
>> >
>> > On Tue, Aug 3, 2010 at 10:02 PM, Dathan Pattishall <da...@gmail.com>
>> > wrote:
>> >>
>> >> The output of htop shows threads as procs with a breakdown of how much
>> >> cpu
>> >> /etc per thread (in ncurses color!). All of these Java "procs" are just
>> >> Java
>> >> threads of only 1 instance of Cassandra per Server.
>> >>
>> >> On Sat, Jul 31, 2010 at 3:45 PM, Benjamin Black <b...@b3k.us> wrote:
>> >>>
>> >>> Sorry, I just noticed: are you running 14 instances of Cassandra on a
>> >>> single physical machine or are all those java processes something
>> >>> else?
>> >>>
>> >>> On Mon, Jul 26, 2010 at 12:22 PM, Dathan Pattishall
>> >>> <da...@gmail.com>
>> >>> wrote:
>> >>> > I have 4 nodes on enterprise type hardware (Lots of Ram 12GB, 16 i7
>> >>> > cores,
>> >>> > RAID Disks).
>> >>> >
>> >>> > ~# /opt/cassandra/bin/nodetool --host=localhost --port=8181 tpstats
>> >>> > Pool Name                    Active   Pending      Completed
>> >>> > STREAM-STAGE                      0         0              0
>> >>> > RESPONSE-STAGE                    0         0         516280
>> >>> > ROW-READ-STAGE                    8      4096        1164326
>> >>> > LB-OPERATIONS                     0         0              0
>> >>> > MESSAGE-DESERIALIZER-POOL         1    682008        1818682
>> >>> > GMFD                              0         0           6467
>> >>> > LB-TARGET                         0         0              0
>> >>> > CONSISTENCY-MANAGER               0         0         661477
>> >>> > ROW-MUTATION-STAGE                0         0         998780
>> >>> > MESSAGE-STREAMING-POOL            0         0              0
>> >>> > LOAD-BALANCER-STAGE               0         0              0
>> >>> > FLUSH-SORTER-POOL                 0         0              0
>> >>> > MEMTABLE-POST-FLUSHER             0         0              4
>> >>> > FLUSH-WRITER-POOL                 0         0              4
>> >>> > AE-SERVICE-STAGE                  0         0              0
>> >>> > HINTED-HANDOFF-POOL               0         0              3
>> >>> >
>> >>> > EQX root@cass04:~# vmstat -n 1
>> >>> >
>> >>> > procs -----------memory---------- ---swap-- -----io---- --system--
>> >>> > -----cpu------
>> >>> >  r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us
>> >>> > sy
>> >>> > id
>> >>> > wa st
>> >>> >  6 10   7096 121816  16244 10375492    0    0     1     3    0    0
>> >>> > 5
>> >>> > 1
>> >>> > 94  0  0
>> >>> >  2 10   7096 116484  16248 10381144    0    0  5636     4 21210 9820
>> >>> > 2  1
>> >>> > 79 18  0
>> >>> >  1  9   7096 108920  16248 10387592    0    0  6216     0 21439 9878
>> >>> > 2  1
>> >>> > 81 16  0
>> >>> >  0  9   7096 129108  16248 10364852    0    0  6024     0 23280 8753
>> >>> > 2  1
>> >>> > 80 17  0
>> >>> >  2  9   7096 122460  16248 10370908    0    0  6072     0 20835 9461
>> >>> > 2  1
>> >>> > 83 14  0
>> >>> >  2  8   7096 115740  16260 10375752    0    0  5168   292 21049 9511
>> >>> > 3  1
>> >>> > 77 20  0
>> >>> >  1 10   7096 108424  16260 10382300    0    0  6244     0 21483 8981
>> >>> > 2  1
>> >>> > 75 22  0
>> >>> >  3  8   7096 125028  16260 10364104    0    0  5584     0 21238 9436
>> >>> > 2  1
>> >>> > 81 16  0
>> >>> >  3  9   7096 117928  16260 10370064    0    0  5988     0 21505
>> >>> > 10225
>> >>> > 2  1
>> >>> > 77 19  0
>> >>> >  1  8   7096 109544  16260 10376640    0    0  6340    28 20840 8602
>> >>> > 2  1
>> >>> > 80 18  0
>> >>> >  0  9   7096 127028  16240 10357652    0    0  5984     0 20853 9158
>> >>> > 2  1
>> >>> > 79 18  0
>> >>> >  9  0   7096 121472  16240 10363492    0    0  5716     0 20520 8489
>> >>> > 1  1
>> >>> > 82 16  0
>> >>> >  3  9   7096 112668  16240 10369872    0    0  6404     0 21314 9459
>> >>> > 2  1
>> >>> > 84 13  0
>> >>> >  1  9   7096 127300  16236 10353440    0    0  5684     0 38914
>> >>> > 10068
>> >>> > 2  1
>> >>> > 76 21  0
>> >>> >
>> >>> >
>> >>> > But the 16 cores are hardly utilized. Which indicates to me there is
>> >>> > some
>> >>> > bad thread thrashing, but why?
>> >>> >
>> >>> >
>> >>> >
>> >>> >   1  [|||||                                               8.3%]
>> >>> > Tasks:
>> >>> > 1070 total, 1 running
>> >>> >   2  [                                                    0.0%]
>> >>> > Load
>> >>> > average: 8.34 9.05 8.82
>> >>> >   3  [                                                    0.0%]
>> >>> > Uptime:
>> >>> > 192 days(!), 15:29:52
>> >>> >   4  [|||||||||||                                        17.9%]
>> >>> >   5  [|||||                                               5.7%]
>> >>> >   6  [||                                                  1.3%]
>> >>> >   7  [||                                                  2.6%]
>> >>> >   8  [|                                                   0.6%]
>> >>> >   9  [|                                                   0.6%]
>> >>> >   10 [||                                                  1.9%]
>> >>> >   11 [||                                                  1.9%]
>> >>> >   12 [||                                                  1.9%]
>> >>> >   13 [||                                                  1.3%]
>> >>> >   14 [|                                                   0.6%]
>> >>> >   15 [||                                                  1.3%]
>> >>> >   16 [|                                                   0.6%]
>> >>> >   Mem[||||||||||||||||||||||||||||||||||||||||||||1791/12028MB]
>> >>> >   Swp[|                                               6/1983MB]
>> >>> >
>> >>> >   PID USER     PRI  NI  VIRT   RES   SHR S CPU% MEM%   TIME+
>> >>> > Command
>> >>> > 30269 root      40   0 14100  2116   900 R  4.0  0.0  0:00.49 htop
>> >>> > 24878 root      40   0 20.6G 8345M 6883M D  3.0 69.4  1:23.03
>> >>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC
>> >>> > -XX:+UseConcMark
>> >>> > 24879 root      40   0 20.6G 8345M 6883M D  3.0 69.4  1:22.93
>> >>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC
>> >>> > -XX:+UseConcMark
>> >>> > 24874 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:22.73
>> >>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC
>> >>> > -XX:+UseConcMark
>> >>> > 24880 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:22.93
>> >>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC
>> >>> > -XX:+UseConcMark
>> >>> > 24875 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:23.17
>> >>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC
>> >>> > -XX:+UseConcMark
>> >>> > 24658 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:23.06
>> >>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC
>> >>> > -XX:+UseConcMark
>> >>> > 24877 root      40   0 20.6G 8345M 6883M S  2.0 69.4  1:23.43
>> >>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC
>> >>> > -XX:+UseConcMark
>> >>> > 24873 root      40   0 20.6G 8345M 6883M D  1.0 69.4  1:23.65
>> >>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC
>> >>> > -XX:+UseConcMark
>> >>> > 24876 root      40   0 20.6G 8345M 6883M S  1.0 69.4  1:23.62
>> >>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC
>> >>> > -XX:+UseConcMark
>> >>> > 24942 root      40   0 20.6G 8345M 6883M S  1.0 69.4  0:23.50
>> >>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC
>> >>> > -XX:+UseConcMark
>> >>> > 24943 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:29.53
>> >>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC
>> >>> > -XX:+UseConcMark
>> >>> > 24933 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:22.57
>> >>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC
>> >>> > -XX:+UseConcMark
>> >>> > 24939 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:12.73
>> >>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC
>> >>> > -XX:+UseConcMark
>> >>> > 25280 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:00.10
>> >>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC
>> >>> > -XX:+UseConcMark
>> >>> >
>> >>
>> >
>> >
>>
>>
>>
>> --
>> Jonathan Ellis
>> Project Chair, Apache Cassandra
>> co-founder of Riptano, the source for professional Cassandra support
>> http://riptano.com
>
>



-- 
Jonathan Ellis
Project Chair, Apache Cassandra
co-founder of Riptano, the source for professional Cassandra support
http://riptano.com

Re: what causes MESSAGE-DESERIALIZER-POOL to spike

Posted by Mike Malone <mi...@simplegeo.com>.

So after 4096 messages get pushed on the row-read-stage queue (or any other
multiThreadedStage) the deserializer basically becomes a single-threaded
blocking queue that prevents any other inter-node RPC from occurring..?
Sounds like it's a problem either way. If the row read stage is what's
backed up, why not have the messages stack up on that stage?

Mike

On Wed, Aug 4, 2010 at 11:46 AM, Jonathan Ellis <jb...@gmail.com> wrote:

> No, MDP is backing up because Row-Read-Stage [the stage after MDP on
> reads] is full at 4096, meaning you're not able to process reads as
> quickly as the requests are coming in.
>
> On Wed, Aug 4, 2010 at 2:21 PM, Mike Malone <mi...@simplegeo.com> wrote:
> > This may be your
> > problem: https://issues.apache.org/jira/browse/CASSANDRA-1358
> > The message deserializer executor is being created with a core pool size
> of
> > 1. Since it uses a queue with unbounded capacity new requests are always
> > queued and the thread pool never grows. So the message deserializer
> becomes
> > a single-threaded bottleneck through which all traffic must pass. So your
> 16
> > cores are reduced to one core for handling all inter-node communication
> (and
> > any intra-node communication that's being passed through the messaging
> > service).
> > Mike
> >
> > On Tue, Aug 3, 2010 at 10:02 PM, Dathan Pattishall <da...@gmail.com>
> > wrote:
> >>
> >> The output of htop shows threads as procs with a breakdown of how much
> cpu
> >> /etc per thread (in ncurses color!). All of these Java "procs" are just
> Java
> >> threads of only 1 instance of Cassandra per Server.
> >>
> >> On Sat, Jul 31, 2010 at 3:45 PM, Benjamin Black <b...@b3k.us> wrote:
> >>>
> >>> Sorry, I just noticed: are you running 14 instances of Cassandra on a
> >>> single physical machine or are all those java processes something
> >>> else?
> >>>
> >>> On Mon, Jul 26, 2010 at 12:22 PM, Dathan Pattishall <
> dathanvp@gmail.com>
> >>> wrote:
> >>> > I have 4 nodes on enterprise type hardware (Lots of Ram 12GB, 16 i7
> >>> > cores,
> >>> > RAID Disks).
> >>> >
> >>> > ~# /opt/cassandra/bin/nodetool --host=localhost --port=8181 tpstats
> >>> > Pool Name                    Active   Pending      Completed
> >>> > STREAM-STAGE                      0         0              0
> >>> > RESPONSE-STAGE                    0         0         516280
> >>> > ROW-READ-STAGE                    8      4096        1164326
> >>> > LB-OPERATIONS                     0         0              0
> >>> > MESSAGE-DESERIALIZER-POOL         1    682008        1818682
> >>> > GMFD                              0         0           6467
> >>> > LB-TARGET                         0         0              0
> >>> > CONSISTENCY-MANAGER               0         0         661477
> >>> > ROW-MUTATION-STAGE                0         0         998780
> >>> > MESSAGE-STREAMING-POOL            0         0              0
> >>> > LOAD-BALANCER-STAGE               0         0              0
> >>> > FLUSH-SORTER-POOL                 0         0              0
> >>> > MEMTABLE-POST-FLUSHER             0         0              4
> >>> > FLUSH-WRITER-POOL                 0         0              4
> >>> > AE-SERVICE-STAGE                  0         0              0
> >>> > HINTED-HANDOFF-POOL               0         0              3
> >>> >
> >>> > EQX root@cass04:~# vmstat -n 1
> >>> >
> >>> > procs -----------memory---------- ---swap-- -----io---- --system--
> >>> > -----cpu------
> >>> >  r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us
> sy
> >>> > id
> >>> > wa st
> >>> >  6 10   7096 121816  16244 10375492    0    0     1     3    0    0
> 5
> >>> > 1
> >>> > 94  0  0
> >>> >  2 10   7096 116484  16248 10381144    0    0  5636     4 21210 9820
> >>> > 2  1
> >>> > 79 18  0
> >>> >  1  9   7096 108920  16248 10387592    0    0  6216     0 21439 9878
> >>> > 2  1
> >>> > 81 16  0
> >>> >  0  9   7096 129108  16248 10364852    0    0  6024     0 23280 8753
> >>> > 2  1
> >>> > 80 17  0
> >>> >  2  9   7096 122460  16248 10370908    0    0  6072     0 20835 9461
> >>> > 2  1
> >>> > 83 14  0
> >>> >  2  8   7096 115740  16260 10375752    0    0  5168   292 21049 9511
> >>> > 3  1
> >>> > 77 20  0
> >>> >  1 10   7096 108424  16260 10382300    0    0  6244     0 21483 8981
> >>> > 2  1
> >>> > 75 22  0
> >>> >  3  8   7096 125028  16260 10364104    0    0  5584     0 21238 9436
> >>> > 2  1
> >>> > 81 16  0
> >>> >  3  9   7096 117928  16260 10370064    0    0  5988     0 21505 10225
> >>> > 2  1
> >>> > 77 19  0
> >>> >  1  8   7096 109544  16260 10376640    0    0  6340    28 20840 8602
> >>> > 2  1
> >>> > 80 18  0
> >>> >  0  9   7096 127028  16240 10357652    0    0  5984     0 20853 9158
> >>> > 2  1
> >>> > 79 18  0
> >>> >  9  0   7096 121472  16240 10363492    0    0  5716     0 20520 8489
> >>> > 1  1
> >>> > 82 16  0
> >>> >  3  9   7096 112668  16240 10369872    0    0  6404     0 21314 9459
> >>> > 2  1
> >>> > 84 13  0
> >>> >  1  9   7096 127300  16236 10353440    0    0  5684     0 38914 10068
> >>> > 2  1
> >>> > 76 21  0
> >>> >
> >>> >
> >>> > But the 16 cores are hardly utilized. Which indicates to me there is
> >>> > some
> >>> > bad thread thrashing, but why?
> >>> >
> >>> >
> >>> >
> >>> >   1  [|||||                                               8.3%]
> >>> > Tasks:
> >>> > 1070 total, 1 running
> >>> >   2  [                                                    0.0%]
> >>> > Load
> >>> > average: 8.34 9.05 8.82
> >>> >   3  [                                                    0.0%]
> >>> > Uptime:
> >>> > 192 days(!), 15:29:52
> >>> >   4  [|||||||||||                                        17.9%]
> >>> >   5  [|||||                                               5.7%]
> >>> >   6  [||                                                  1.3%]
> >>> >   7  [||                                                  2.6%]
> >>> >   8  [|                                                   0.6%]
> >>> >   9  [|                                                   0.6%]
> >>> >   10 [||                                                  1.9%]
> >>> >   11 [||                                                  1.9%]
> >>> >   12 [||                                                  1.9%]
> >>> >   13 [||                                                  1.3%]
> >>> >   14 [|                                                   0.6%]
> >>> >   15 [||                                                  1.3%]
> >>> >   16 [|                                                   0.6%]
> >>> >   Mem[||||||||||||||||||||||||||||||||||||||||||||1791/12028MB]
> >>> >   Swp[|                                               6/1983MB]
> >>> >
> >>> >   PID USER     PRI  NI  VIRT   RES   SHR S CPU% MEM%   TIME+  Command
> >>> > 30269 root      40   0 14100  2116   900 R  4.0  0.0  0:00.49 htop
> >>> > 24878 root      40   0 20.6G 8345M 6883M D  3.0 69.4  1:23.03
> >>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC
> -XX:+UseConcMark
> >>> > 24879 root      40   0 20.6G 8345M 6883M D  3.0 69.4  1:22.93
> >>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC
> -XX:+UseConcMark
> >>> > 24874 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:22.73
> >>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC
> -XX:+UseConcMark
> >>> > 24880 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:22.93
> >>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC
> -XX:+UseConcMark
> >>> > 24875 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:23.17
> >>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC
> -XX:+UseConcMark
> >>> > 24658 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:23.06
> >>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC
> -XX:+UseConcMark
> >>> > 24877 root      40   0 20.6G 8345M 6883M S  2.0 69.4  1:23.43
> >>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC
> -XX:+UseConcMark
> >>> > 24873 root      40   0 20.6G 8345M 6883M D  1.0 69.4  1:23.65
> >>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC
> -XX:+UseConcMark
> >>> > 24876 root      40   0 20.6G 8345M 6883M S  1.0 69.4  1:23.62
> >>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC
> -XX:+UseConcMark
> >>> > 24942 root      40   0 20.6G 8345M 6883M S  1.0 69.4  0:23.50
> >>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC
> -XX:+UseConcMark
> >>> > 24943 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:29.53
> >>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC
> -XX:+UseConcMark
> >>> > 24933 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:22.57
> >>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC
> -XX:+UseConcMark
> >>> > 24939 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:12.73
> >>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC
> -XX:+UseConcMark
> >>> > 25280 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:00.10
> >>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC
> -XX:+UseConcMark
> >>> >
> >>
> >
> >
>
>
>
> --
> Jonathan Ellis
> Project Chair, Apache Cassandra
> co-founder of Riptano, the source for professional Cassandra support
> http://riptano.com
>

Re: what causes MESSAGE-DESERIALIZER-POOL to spike

Posted by Jonathan Ellis <jb...@gmail.com>.

No, MDP is backing up because Row-Read-Stage [the stage after MDP on
reads] is full at 4096, meaning you're not able to process reads as
quickly as the requests are coming in.

On Wed, Aug 4, 2010 at 2:21 PM, Mike Malone <mi...@simplegeo.com> wrote:
> This may be your
> problem: https://issues.apache.org/jira/browse/CASSANDRA-1358
> The message deserializer executor is being created with a core pool size of
> 1. Since it uses a queue with unbounded capacity new requests are always
> queued and the thread pool never grows. So the message deserializer becomes
> a single-threaded bottleneck through which all traffic must pass. So your 16
> cores are reduced to one core for handling all inter-node communication (and
> any intra-node communication that's being passed through the messaging
> service).
> Mike
>
> On Tue, Aug 3, 2010 at 10:02 PM, Dathan Pattishall <da...@gmail.com>
> wrote:
>>
>> The output of htop shows threads as procs with a breakdown of how much cpu
>> /etc per thread (in ncurses color!). All of these Java "procs" are just Java
>> threads of only 1 instance of Cassandra per Server.
>>
>> On Sat, Jul 31, 2010 at 3:45 PM, Benjamin Black <b...@b3k.us> wrote:
>>>
>>> Sorry, I just noticed: are you running 14 instances of Cassandra on a
>>> single physical machine or are all those java processes something
>>> else?
>>>
>>> On Mon, Jul 26, 2010 at 12:22 PM, Dathan Pattishall <da...@gmail.com>
>>> wrote:
>>> > I have 4 nodes on enterprise type hardware (Lots of Ram 12GB, 16 i7
>>> > cores,
>>> > RAID Disks).
>>> >
>>> > ~# /opt/cassandra/bin/nodetool --host=localhost --port=8181 tpstats
>>> > Pool Name                    Active   Pending      Completed
>>> > STREAM-STAGE                      0         0              0
>>> > RESPONSE-STAGE                    0         0         516280
>>> > ROW-READ-STAGE                    8      4096        1164326
>>> > LB-OPERATIONS                     0         0              0
>>> > MESSAGE-DESERIALIZER-POOL         1    682008        1818682
>>> > GMFD                              0         0           6467
>>> > LB-TARGET                         0         0              0
>>> > CONSISTENCY-MANAGER               0         0         661477
>>> > ROW-MUTATION-STAGE                0         0         998780
>>> > MESSAGE-STREAMING-POOL            0         0              0
>>> > LOAD-BALANCER-STAGE               0         0              0
>>> > FLUSH-SORTER-POOL                 0         0              0
>>> > MEMTABLE-POST-FLUSHER             0         0              4
>>> > FLUSH-WRITER-POOL                 0         0              4
>>> > AE-SERVICE-STAGE                  0         0              0
>>> > HINTED-HANDOFF-POOL               0         0              3
>>> >
>>> > EQX root@cass04:~# vmstat -n 1
>>> >
>>> > procs -----------memory---------- ---swap-- -----io---- --system--
>>> > -----cpu------
>>> >  r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy
>>> > id
>>> > wa st
>>> >  6 10   7096 121816  16244 10375492    0    0     1     3    0    0  5
>>> > 1
>>> > 94  0  0
>>> >  2 10   7096 116484  16248 10381144    0    0  5636     4 21210 9820
>>> > 2  1
>>> > 79 18  0
>>> >  1  9   7096 108920  16248 10387592    0    0  6216     0 21439 9878
>>> > 2  1
>>> > 81 16  0
>>> >  0  9   7096 129108  16248 10364852    0    0  6024     0 23280 8753
>>> > 2  1
>>> > 80 17  0
>>> >  2  9   7096 122460  16248 10370908    0    0  6072     0 20835 9461
>>> > 2  1
>>> > 83 14  0
>>> >  2  8   7096 115740  16260 10375752    0    0  5168   292 21049 9511
>>> > 3  1
>>> > 77 20  0
>>> >  1 10   7096 108424  16260 10382300    0    0  6244     0 21483 8981
>>> > 2  1
>>> > 75 22  0
>>> >  3  8   7096 125028  16260 10364104    0    0  5584     0 21238 9436
>>> > 2  1
>>> > 81 16  0
>>> >  3  9   7096 117928  16260 10370064    0    0  5988     0 21505 10225
>>> > 2  1
>>> > 77 19  0
>>> >  1  8   7096 109544  16260 10376640    0    0  6340    28 20840 8602
>>> > 2  1
>>> > 80 18  0
>>> >  0  9   7096 127028  16240 10357652    0    0  5984     0 20853 9158
>>> > 2  1
>>> > 79 18  0
>>> >  9  0   7096 121472  16240 10363492    0    0  5716     0 20520 8489
>>> > 1  1
>>> > 82 16  0
>>> >  3  9   7096 112668  16240 10369872    0    0  6404     0 21314 9459
>>> > 2  1
>>> > 84 13  0
>>> >  1  9   7096 127300  16236 10353440    0    0  5684     0 38914 10068
>>> > 2  1
>>> > 76 21  0
>>> >
>>> >
>>> > But the 16 cores are hardly utilized. Which indicates to me there is
>>> > some
>>> > bad thread thrashing, but why?
>>> >
>>> >
>>> >
>>> >   1  [|||||                                               8.3%]
>>> > Tasks:
>>> > 1070 total, 1 running
>>> >   2  [                                                    0.0%]
>>> > Load
>>> > average: 8.34 9.05 8.82
>>> >   3  [                                                    0.0%]
>>> > Uptime:
>>> > 192 days(!), 15:29:52
>>> >   4  [|||||||||||                                        17.9%]
>>> >   5  [|||||                                               5.7%]
>>> >   6  [||                                                  1.3%]
>>> >   7  [||                                                  2.6%]
>>> >   8  [|                                                   0.6%]
>>> >   9  [|                                                   0.6%]
>>> >   10 [||                                                  1.9%]
>>> >   11 [||                                                  1.9%]
>>> >   12 [||                                                  1.9%]
>>> >   13 [||                                                  1.3%]
>>> >   14 [|                                                   0.6%]
>>> >   15 [||                                                  1.3%]
>>> >   16 [|                                                   0.6%]
>>> >   Mem[||||||||||||||||||||||||||||||||||||||||||||1791/12028MB]
>>> >   Swp[|                                               6/1983MB]
>>> >
>>> >   PID USER     PRI  NI  VIRT   RES   SHR S CPU% MEM%   TIME+  Command
>>> > 30269 root      40   0 14100  2116   900 R  4.0  0.0  0:00.49 htop
>>> > 24878 root      40   0 20.6G 8345M 6883M D  3.0 69.4  1:23.03
>>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>>> > 24879 root      40   0 20.6G 8345M 6883M D  3.0 69.4  1:22.93
>>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>>> > 24874 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:22.73
>>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>>> > 24880 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:22.93
>>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>>> > 24875 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:23.17
>>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>>> > 24658 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:23.06
>>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>>> > 24877 root      40   0 20.6G 8345M 6883M S  2.0 69.4  1:23.43
>>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>>> > 24873 root      40   0 20.6G 8345M 6883M D  1.0 69.4  1:23.65
>>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>>> > 24876 root      40   0 20.6G 8345M 6883M S  1.0 69.4  1:23.62
>>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>>> > 24942 root      40   0 20.6G 8345M 6883M S  1.0 69.4  0:23.50
>>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>>> > 24943 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:29.53
>>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>>> > 24933 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:22.57
>>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>>> > 24939 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:12.73
>>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>>> > 25280 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:00.10
>>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>>> >
>>
>
>



-- 
Jonathan Ellis
Project Chair, Apache Cassandra
co-founder of Riptano, the source for professional Cassandra support
http://riptano.com

Re: what causes MESSAGE-DESERIALIZER-POOL to spike

Posted by Mike Malone <mi...@simplegeo.com>.

This may be your problem:
https://issues.apache.org/jira/browse/CASSANDRA-1358

The message deserializer executor is being created with a core pool size of
1. Since it uses a queue with unbounded capacity new requests are always
queued and the thread pool never grows. So the message deserializer becomes
a single-threaded bottleneck through which all traffic must pass. So your 16
cores are reduced to one core for handling all inter-node communication (and
any intra-node communication that's being passed through the messaging
service).

Mike

On Tue, Aug 3, 2010 at 10:02 PM, Dathan Pattishall <da...@gmail.com>wrote:

> The output of htop shows threads as procs with a breakdown of how much cpu
> /etc per thread (in ncurses color!). All of these Java "procs" are just Java
> threads of only 1 instance of Cassandra per Server.
>
>
> On Sat, Jul 31, 2010 at 3:45 PM, Benjamin Black <b...@b3k.us> wrote:
>
>> Sorry, I just noticed: are you running 14 instances of Cassandra on a
>> single physical machine or are all those java processes something
>> else?
>>
>> On Mon, Jul 26, 2010 at 12:22 PM, Dathan Pattishall <da...@gmail.com>
>> wrote:
>> > I have 4 nodes on enterprise type hardware (Lots of Ram 12GB, 16 i7
>> cores,
>> > RAID Disks).
>> >
>> > ~# /opt/cassandra/bin/nodetool --host=localhost --port=8181 tpstats
>> > Pool Name                    Active   Pending      Completed
>> > STREAM-STAGE                      0         0              0
>> > RESPONSE-STAGE                    0         0         516280
>> > ROW-READ-STAGE                    8      4096        1164326
>> > LB-OPERATIONS                     0         0              0
>> > MESSAGE-DESERIALIZER-POOL         1    682008        1818682
>> > GMFD                              0         0           6467
>> > LB-TARGET                         0         0              0
>> > CONSISTENCY-MANAGER               0         0         661477
>> > ROW-MUTATION-STAGE                0         0         998780
>> > MESSAGE-STREAMING-POOL            0         0              0
>> > LOAD-BALANCER-STAGE               0         0              0
>> > FLUSH-SORTER-POOL                 0         0              0
>> > MEMTABLE-POST-FLUSHER             0         0              4
>> > FLUSH-WRITER-POOL                 0         0              4
>> > AE-SERVICE-STAGE                  0         0              0
>> > HINTED-HANDOFF-POOL               0         0              3
>> >
>> > EQX root@cass04:~# vmstat -n 1
>> >
>> > procs -----------memory---------- ---swap-- -----io---- --system--
>> > -----cpu------
>> >  r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy
>> id
>> > wa st
>> >  6 10   7096 121816  16244 10375492    0    0     1     3    0    0  5
>> 1
>> > 94  0  0
>> >  2 10   7096 116484  16248 10381144    0    0  5636     4 21210 9820  2
>> 1
>> > 79 18  0
>> >  1  9   7096 108920  16248 10387592    0    0  6216     0 21439 9878  2
>> 1
>> > 81 16  0
>> >  0  9   7096 129108  16248 10364852    0    0  6024     0 23280 8753  2
>> 1
>> > 80 17  0
>> >  2  9   7096 122460  16248 10370908    0    0  6072     0 20835 9461  2
>> 1
>> > 83 14  0
>> >  2  8   7096 115740  16260 10375752    0    0  5168   292 21049 9511  3
>> 1
>> > 77 20  0
>> >  1 10   7096 108424  16260 10382300    0    0  6244     0 21483 8981  2
>> 1
>> > 75 22  0
>> >  3  8   7096 125028  16260 10364104    0    0  5584     0 21238 9436  2
>> 1
>> > 81 16  0
>> >  3  9   7096 117928  16260 10370064    0    0  5988     0 21505 10225
>> 2  1
>> > 77 19  0
>> >  1  8   7096 109544  16260 10376640    0    0  6340    28 20840 8602  2
>> 1
>> > 80 18  0
>> >  0  9   7096 127028  16240 10357652    0    0  5984     0 20853 9158  2
>> 1
>> > 79 18  0
>> >  9  0   7096 121472  16240 10363492    0    0  5716     0 20520 8489  1
>> 1
>> > 82 16  0
>> >  3  9   7096 112668  16240 10369872    0    0  6404     0 21314 9459  2
>> 1
>> > 84 13  0
>> >  1  9   7096 127300  16236 10353440    0    0  5684     0 38914 10068
>> 2  1
>> > 76 21  0
>> >
>> >
>> > But the 16 cores are hardly utilized. Which indicates to me there is
>> some
>> > bad thread thrashing, but why?
>> >
>> >
>> >
>> >   1  [|||||                                               8.3%]
>> Tasks:
>> > 1070 total, 1 running
>> >   2  [                                                    0.0%]     Load
>> > average: 8.34 9.05 8.82
>> >   3  [                                                    0.0%]
>> Uptime:
>> > 192 days(!), 15:29:52
>> >   4  [|||||||||||                                        17.9%]
>> >   5  [|||||                                               5.7%]
>> >   6  [||                                                  1.3%]
>> >   7  [||                                                  2.6%]
>> >   8  [|                                                   0.6%]
>> >   9  [|                                                   0.6%]
>> >   10 [||                                                  1.9%]
>> >   11 [||                                                  1.9%]
>> >   12 [||                                                  1.9%]
>> >   13 [||                                                  1.3%]
>> >   14 [|                                                   0.6%]
>> >   15 [||                                                  1.3%]
>> >   16 [|                                                   0.6%]
>> >   Mem[||||||||||||||||||||||||||||||||||||||||||||1791/12028MB]
>> >   Swp[|                                               6/1983MB]
>> >
>> >   PID USER     PRI  NI  VIRT   RES   SHR S CPU% MEM%   TIME+  Command
>> > 30269 root      40   0 14100  2116   900 R  4.0  0.0  0:00.49 htop
>> > 24878 root      40   0 20.6G 8345M 6883M D  3.0 69.4  1:23.03
>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> > 24879 root      40   0 20.6G 8345M 6883M D  3.0 69.4  1:22.93
>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> > 24874 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:22.73
>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> > 24880 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:22.93
>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> > 24875 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:23.17
>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> > 24658 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:23.06
>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> > 24877 root      40   0 20.6G 8345M 6883M S  2.0 69.4  1:23.43
>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> > 24873 root      40   0 20.6G 8345M 6883M D  1.0 69.4  1:23.65
>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> > 24876 root      40   0 20.6G 8345M 6883M S  1.0 69.4  1:23.62
>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> > 24942 root      40   0 20.6G 8345M 6883M S  1.0 69.4  0:23.50
>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> > 24943 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:29.53
>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> > 24933 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:22.57
>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> > 24939 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:12.73
>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> > 25280 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:00.10
>> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> >
>>
>
>

Re: what causes MESSAGE-DESERIALIZER-POOL to spike

Posted by Dathan Pattishall <da...@gmail.com>.

The output of htop shows threads as procs with a breakdown of how much cpu
/etc per thread (in ncurses color!). All of these Java "procs" are just Java
threads of only 1 instance of Cassandra per Server.

On Sat, Jul 31, 2010 at 3:45 PM, Benjamin Black <b...@b3k.us> wrote:

> Sorry, I just noticed: are you running 14 instances of Cassandra on a
> single physical machine or are all those java processes something
> else?
>
> On Mon, Jul 26, 2010 at 12:22 PM, Dathan Pattishall <da...@gmail.com>
> wrote:
> > I have 4 nodes on enterprise type hardware (Lots of Ram 12GB, 16 i7
> cores,
> > RAID Disks).
> >
> > ~# /opt/cassandra/bin/nodetool --host=localhost --port=8181 tpstats
> > Pool Name                    Active   Pending      Completed
> > STREAM-STAGE                      0         0              0
> > RESPONSE-STAGE                    0         0         516280
> > ROW-READ-STAGE                    8      4096        1164326
> > LB-OPERATIONS                     0         0              0
> > MESSAGE-DESERIALIZER-POOL         1    682008        1818682
> > GMFD                              0         0           6467
> > LB-TARGET                         0         0              0
> > CONSISTENCY-MANAGER               0         0         661477
> > ROW-MUTATION-STAGE                0         0         998780
> > MESSAGE-STREAMING-POOL            0         0              0
> > LOAD-BALANCER-STAGE               0         0              0
> > FLUSH-SORTER-POOL                 0         0              0
> > MEMTABLE-POST-FLUSHER             0         0              4
> > FLUSH-WRITER-POOL                 0         0              4
> > AE-SERVICE-STAGE                  0         0              0
> > HINTED-HANDOFF-POOL               0         0              3
> >
> > EQX root@cass04:~# vmstat -n 1
> >
> > procs -----------memory---------- ---swap-- -----io---- --system--
> > -----cpu------
> >  r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy
> id
> > wa st
> >  6 10   7096 121816  16244 10375492    0    0     1     3    0    0  5  1
> > 94  0  0
> >  2 10   7096 116484  16248 10381144    0    0  5636     4 21210 9820  2
> 1
> > 79 18  0
> >  1  9   7096 108920  16248 10387592    0    0  6216     0 21439 9878  2
> 1
> > 81 16  0
> >  0  9   7096 129108  16248 10364852    0    0  6024     0 23280 8753  2
> 1
> > 80 17  0
> >  2  9   7096 122460  16248 10370908    0    0  6072     0 20835 9461  2
> 1
> > 83 14  0
> >  2  8   7096 115740  16260 10375752    0    0  5168   292 21049 9511  3
> 1
> > 77 20  0
> >  1 10   7096 108424  16260 10382300    0    0  6244     0 21483 8981  2
> 1
> > 75 22  0
> >  3  8   7096 125028  16260 10364104    0    0  5584     0 21238 9436  2
> 1
> > 81 16  0
> >  3  9   7096 117928  16260 10370064    0    0  5988     0 21505 10225  2
> 1
> > 77 19  0
> >  1  8   7096 109544  16260 10376640    0    0  6340    28 20840 8602  2
> 1
> > 80 18  0
> >  0  9   7096 127028  16240 10357652    0    0  5984     0 20853 9158  2
> 1
> > 79 18  0
> >  9  0   7096 121472  16240 10363492    0    0  5716     0 20520 8489  1
> 1
> > 82 16  0
> >  3  9   7096 112668  16240 10369872    0    0  6404     0 21314 9459  2
> 1
> > 84 13  0
> >  1  9   7096 127300  16236 10353440    0    0  5684     0 38914 10068  2
> 1
> > 76 21  0
> >
> >
> > But the 16 cores are hardly utilized. Which indicates to me there is some
> > bad thread thrashing, but why?
> >
> >
> >
> >   1  [|||||                                               8.3%]
> Tasks:
> > 1070 total, 1 running
> >   2  [                                                    0.0%]     Load
> > average: 8.34 9.05 8.82
> >   3  [                                                    0.0%]
> Uptime:
> > 192 days(!), 15:29:52
> >   4  [|||||||||||                                        17.9%]
> >   5  [|||||                                               5.7%]
> >   6  [||                                                  1.3%]
> >   7  [||                                                  2.6%]
> >   8  [|                                                   0.6%]
> >   9  [|                                                   0.6%]
> >   10 [||                                                  1.9%]
> >   11 [||                                                  1.9%]
> >   12 [||                                                  1.9%]
> >   13 [||                                                  1.3%]
> >   14 [|                                                   0.6%]
> >   15 [||                                                  1.3%]
> >   16 [|                                                   0.6%]
> >   Mem[||||||||||||||||||||||||||||||||||||||||||||1791/12028MB]
> >   Swp[|                                               6/1983MB]
> >
> >   PID USER     PRI  NI  VIRT   RES   SHR S CPU% MEM%   TIME+  Command
> > 30269 root      40   0 14100  2116   900 R  4.0  0.0  0:00.49 htop
> > 24878 root      40   0 20.6G 8345M 6883M D  3.0 69.4  1:23.03
> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> > 24879 root      40   0 20.6G 8345M 6883M D  3.0 69.4  1:22.93
> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> > 24874 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:22.73
> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> > 24880 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:22.93
> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> > 24875 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:23.17
> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> > 24658 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:23.06
> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> > 24877 root      40   0 20.6G 8345M 6883M S  2.0 69.4  1:23.43
> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> > 24873 root      40   0 20.6G 8345M 6883M D  1.0 69.4  1:23.65
> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> > 24876 root      40   0 20.6G 8345M 6883M S  1.0 69.4  1:23.62
> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> > 24942 root      40   0 20.6G 8345M 6883M S  1.0 69.4  0:23.50
> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> > 24943 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:29.53
> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> > 24933 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:22.57
> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> > 24939 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:12.73
> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> > 25280 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:00.10
> > /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >
>

Re: what causes MESSAGE-DESERIALIZER-POOL to spike

Posted by Benjamin Black <b...@b3k.us>.

Sorry, I just noticed: are you running 14 instances of Cassandra on a
single physical machine or are all those java processes something
else?

On Mon, Jul 26, 2010 at 12:22 PM, Dathan Pattishall <da...@gmail.com> wrote:
> I have 4 nodes on enterprise type hardware (Lots of Ram 12GB, 16 i7 cores,
> RAID Disks).
>
> ~# /opt/cassandra/bin/nodetool --host=localhost --port=8181 tpstats
> Pool Name                    Active   Pending      Completed
> STREAM-STAGE                      0         0              0
> RESPONSE-STAGE                    0         0         516280
> ROW-READ-STAGE                    8      4096        1164326
> LB-OPERATIONS                     0         0              0
> MESSAGE-DESERIALIZER-POOL         1    682008        1818682
> GMFD                              0         0           6467
> LB-TARGET                         0         0              0
> CONSISTENCY-MANAGER               0         0         661477
> ROW-MUTATION-STAGE                0         0         998780
> MESSAGE-STREAMING-POOL            0         0              0
> LOAD-BALANCER-STAGE               0         0              0
> FLUSH-SORTER-POOL                 0         0              0
> MEMTABLE-POST-FLUSHER             0         0              4
> FLUSH-WRITER-POOL                 0         0              4
> AE-SERVICE-STAGE                  0         0              0
> HINTED-HANDOFF-POOL               0         0              3
>
> EQX root@cass04:~# vmstat -n 1
>
> procs -----------memory---------- ---swap-- -----io---- --system--
> -----cpu------
>  r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id
> wa st
>  6 10   7096 121816  16244 10375492    0    0     1     3    0    0  5  1
> 94  0  0
>  2 10   7096 116484  16248 10381144    0    0  5636     4 21210 9820  2  1
> 79 18  0
>  1  9   7096 108920  16248 10387592    0    0  6216     0 21439 9878  2  1
> 81 16  0
>  0  9   7096 129108  16248 10364852    0    0  6024     0 23280 8753  2  1
> 80 17  0
>  2  9   7096 122460  16248 10370908    0    0  6072     0 20835 9461  2  1
> 83 14  0
>  2  8   7096 115740  16260 10375752    0    0  5168   292 21049 9511  3  1
> 77 20  0
>  1 10   7096 108424  16260 10382300    0    0  6244     0 21483 8981  2  1
> 75 22  0
>  3  8   7096 125028  16260 10364104    0    0  5584     0 21238 9436  2  1
> 81 16  0
>  3  9   7096 117928  16260 10370064    0    0  5988     0 21505 10225  2  1
> 77 19  0
>  1  8   7096 109544  16260 10376640    0    0  6340    28 20840 8602  2  1
> 80 18  0
>  0  9   7096 127028  16240 10357652    0    0  5984     0 20853 9158  2  1
> 79 18  0
>  9  0   7096 121472  16240 10363492    0    0  5716     0 20520 8489  1  1
> 82 16  0
>  3  9   7096 112668  16240 10369872    0    0  6404     0 21314 9459  2  1
> 84 13  0
>  1  9   7096 127300  16236 10353440    0    0  5684     0 38914 10068  2  1
> 76 21  0
>
>
> But the 16 cores are hardly utilized. Which indicates to me there is some
> bad thread thrashing, but why?
>
>
>
>   1  [|||||                                               8.3%]     Tasks:
> 1070 total, 1 running
>   2  [                                                    0.0%]     Load
> average: 8.34 9.05 8.82
>   3  [                                                    0.0%]     Uptime:
> 192 days(!), 15:29:52
>   4  [|||||||||||                                        17.9%]
>   5  [|||||                                               5.7%]
>   6  [||                                                  1.3%]
>   7  [||                                                  2.6%]
>   8  [|                                                   0.6%]
>   9  [|                                                   0.6%]
>   10 [||                                                  1.9%]
>   11 [||                                                  1.9%]
>   12 [||                                                  1.9%]
>   13 [||                                                  1.3%]
>   14 [|                                                   0.6%]
>   15 [||                                                  1.3%]
>   16 [|                                                   0.6%]
>   Mem[||||||||||||||||||||||||||||||||||||||||||||1791/12028MB]
>   Swp[|                                               6/1983MB]
>
>   PID USER     PRI  NI  VIRT   RES   SHR S CPU% MEM%   TIME+  Command
> 30269 root      40   0 14100  2116   900 R  4.0  0.0  0:00.49 htop
> 24878 root      40   0 20.6G 8345M 6883M D  3.0 69.4  1:23.03
> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> 24879 root      40   0 20.6G 8345M 6883M D  3.0 69.4  1:22.93
> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> 24874 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:22.73
> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> 24880 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:22.93
> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> 24875 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:23.17
> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> 24658 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:23.06
> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> 24877 root      40   0 20.6G 8345M 6883M S  2.0 69.4  1:23.43
> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> 24873 root      40   0 20.6G 8345M 6883M D  1.0 69.4  1:23.65
> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> 24876 root      40   0 20.6G 8345M 6883M S  1.0 69.4  1:23.62
> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> 24942 root      40   0 20.6G 8345M 6883M S  1.0 69.4  0:23.50
> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> 24943 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:29.53
> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> 24933 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:22.57
> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> 24939 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:12.73
> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> 25280 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:00.10
> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>

Re: what causes MESSAGE-DESERIALIZER-POOL to spike

Posted by Chris Goffinet <cg...@chrisgoffinet.com>.

When you can't get the number of threads, that means you have way too many running (8,000+) usually.

Try running `ps -eLf | grep cassandra`. How many threads?

-Chris

On Jul 29, 2010, at 8:40 PM, Dathan Pattishall wrote:

> 
> To Follow up on this thread. I blew away the data for my entire cluster, waited a few days of user activity and within 3 days the server hangs requests in the same way.
> 
> 
> Background Info: 
> Make around 60 million requests per day.
> 70% reads
> 30% writes
> an F5 Loadbalancer (BIGIP-LTM) in a round robin config.
> 
> 
> 
> 
> IOSTAT Info:
> 3 MB a secon of writing data @ 13% IOWAIT
> 
> VMStat Info:
> still shows a lock of blocking procs at a low CPU utilization.
> 
> Data Size:
> 6 GB of data per node and there is 4 nodes
> 
> cass01: Pool Name                    Active   Pending      Completed
> cass01: FILEUTILS-DELETE-POOL             0         0             27
> cass01: STREAM-STAGE                      0         0              8
> cass01: RESPONSE-STAGE                    0         0       66439845
> cass01: ROW-READ-STAGE                    8      4098       77243463
> cass01: LB-OPERATIONS                     0         0              0
> cass01: MESSAGE-DESERIALIZER-POOL         1  14223148      139627123
> cass01: GMFD                              0         0         772032
> cass01: LB-TARGET                         0         0              0
> cass01: CONSISTENCY-MANAGER               0         0       35518593
> cass01: ROW-MUTATION-STAGE                0         0       19809347
> cass01: MESSAGE-STREAMING-POOL            0         0             24
> cass01: LOAD-BALANCER-STAGE               0         0              0
> cass01: FLUSH-SORTER-POOL                 0         0              0
> cass01: MEMTABLE-POST-FLUSHER             0         0             74
> cass01: FLUSH-WRITER-POOL                 0         0             74
> cass01: AE-SERVICE-STAGE                  0         0              0
> cass01: HINTED-HANDOFF-POOL               0         0              9
> 
> 
> 
> Keyspace: TimeFrameClicks
>         Read Count: 42686
>         Read Latency: 47.21777100220213 ms.
>         Write Count: 18398
>         Write Latency: 0.17457457332318732 ms.
>         Pending Tasks: 0
>                 Column Family: Standard2
>                 SSTable count: 9
>                 Space used (live): 6561033040
>                 Space used (total): 6561033040
>                 Memtable Columns Count: 6711
>                 Memtable Data Size: 241596
>                 Memtable Switch Count: 1
>                 Read Count: 42552
>                 Read Latency: 41.851 ms.
>                 Write Count: 18398
>                 Write Latency: 0.031 ms.
>                 Pending Tasks: 0
>                 Key cache capacity: 200000
>                 Key cache size: 81499
>                 Key cache hit rate: 0.2495154675604193
>                 Row cache: disabled
>                 Compacted row minimum size: 0
>                 Compacted row maximum size: 0
>                 Compacted row mean size: 0
> 
> 
> Attached is jconsole memory use.
> I would attach the thread use but I could not get any info from JMX on the threads. And clicking detect deadlock just hangs, I do not see the expected No deadlock detected.
> 
> 
> Based on Feedback from this list by jbellis, I'm hitting cassandra to hard. So I removed the offending server from the LB. Waited about 20 mins and the pending queue did not clear at all.
> 
> Killing Cassandra and restarting it, this box recovered.
> 
> 
> 
> 
> So from my point of view I think there is a bug in Cassandra? Do you agree? Possibly a dead lock in the SEDA implementation of the ROW-READ-STAGE?
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> On Tue, Jul 27, 2010 at 12:28 AM, Peter Schuller <pe...@infidyne.com> wrote:
> > average queue size column too. But given the vmstat output I doubt
> > this is the case since you should either be seeing a lot more wait
> > time or a lot less idle time.
> 
> Hmm, another thing: you mention 16 i7 cores. I presume that's 16 in
> total, counting hyper-threading? Because that means 8 threads should
> be able to saturate 50% (as perceived by the operating system). If you
> have 32 (can you get this yet anyway?) virtual cores then I'd say that
> your vmstat output could be consistent with READ-ROW-STAGE being CPU
> bound rather than disk bound (presumably with data fitting in cache
> and not having to go down to disk). If this is the case, increasing
> read concurrency should at least make the actual problem more obvious
> (i.e., achieving CPU saturation), though it probably won't increase
> throughput much unless Cassandra is very friendly to
> hyperthreading....
> 
> --
> / Peter Schuller
> 
> <memory_use.PNG>

Re: what causes MESSAGE-DESERIALIZER-POOL to spike

Posted by Peter Schuller <pe...@infidyne.com>.

> Latency is fine, basically the service suddenly freezes. On top of that to
> reduce the number of reads I have memcache fronting this @ a 92% hit rate

Ok. In that case if feels most likely to me that you're not throwing
too much read traffic at it consistently, but rather that there is
either

  (a) some gradual accumulation of e.g. stuck threads in the read
stage or similar, or
  (b) something happening more suddenly that is directly causing your
problem (e.g., sudden extreme spike in traffic triggering some bug,
touching some particular row that goes to an unreadable part of disk,
etc).

> I have very detailed stats on the exceptions thrown from the cassandra
> client. For about 3-5 days I have a 99% success ratio with connections +
> service of pulling a single hash key with a single column.
>
> i.e. {$sn}_{$userid}_{$click} => {$when}
>
> then I have about a 25-40% failure rate when the hang occurs.

Are the 1% failures you normally experience (pre-freeze) timeouts?

-- 
/ Peter Schuller

Re: what causes MESSAGE-DESERIALIZER-POOL to spike

Posted by Dathan Pattishall <da...@gmail.com>.

On Fri, Jul 30, 2010 at 1:05 AM, Peter Schuller <peter.schuller@infidyne.com
> wrote:

> > Keyspace: TimeFrameClicks
> >         Read Count: 42686
> >         Read Latency: 47.21777100220213 ms.
> >         Write Count: 18398
> >         Write Latency: 0.17457457332318732 ms.
>
> Is this all traffic across "a few days" as you mentioned the node had
> been running?
>

Sorry that is a cut and paste after the restart. Pattern remains the same. I
just did a full cluster restart to stop the Transport and Timeout exceptions
and the app recovers.
When it happens again I'll send you the info prior to the restart.



> Also: If you're hitting it too hard such that you have a cumulative
> effect over time until you notice it, I'd expect latencies to become
> worse and worse over time (and obviously so); you wouldn't be all fine
> and then suddenly freeze.
>

Latency is fine, basically the service suddenly freezes. On top of that to
reduce the number of reads I have memcache fronting this @ a 92% hit rate



> How much do you know about individual request success rate? is it
> possible that some subset of requests never finish at all (does your
> application log timeouts?), while others proceed without difficulty.
> That might lead to an overall build-up of pending jobs even though
> most requests make it through. Then once you eventually hit a queue
> size limit/concurrency limit you may experience a "sudden" hang.
>
>
I have very detailed stats on the exceptions thrown from the cassandra
client. For about 3-5 days I have a 99% success ratio with connections +
service of pulling a single hash key with a single column.

i.e. {$sn}_{$userid}_{$click} => {$when}

then I have about a 25-40% failure rate when the hang occurs.



> I'm still wondering what the threads are actually doing in the
> ROW-READ-STAGE.
>
> Are you blocking forever trying to read from disk for example? Such
> things can happen with hardware issues or OS issues; some particular
> part of a file may be unreadable. In such a case you'd expect queries
> for certain columns or ranges of column to hang.
>
> I think it might be useful to observe the stages stats immediately on
> restart. Keep an eye out for how many concurrent requests there are in
> the ROW-READ-STAGE. Based on the numbers you've posted I'd suspect
> that the concurrency will usually be 0. In any case you should see it
> fluctuate, and hopefully it will be visible when the fluctuations
> suddenly displace by one thread (so you go from fluctuating between
> e.g. 0-3 threads to between 1-4 threads never going below 1). This
> should indicate you have a hung thread, and it may be worth attaching
> with jconsole to see what that thread is blocking on, before so much
> time has passed that you have thousands of threads waiting.
>
> If it *is* a matter of parts of files being unreadable it may be
> easier to spot using standard I/O rather than mmap():ed I/O since you
> should clearly see it blocking on a read in that case.
>
> Interesting I will try this. Thanks Peter!



> --
> / Peter Schuller
>

Re: what causes MESSAGE-DESERIALIZER-POOL to spike

Posted by Peter Schuller <pe...@infidyne.com>.

> Keyspace: TimeFrameClicks
>         Read Count: 42686
>         Read Latency: 47.21777100220213 ms.
>         Write Count: 18398
>         Write Latency: 0.17457457332318732 ms.

Is this all traffic across "a few days" as you mentioned the node had
been running?

> Based on Feedback from this list by jbellis, I'm hitting cassandra to hard.
> So I removed the offending server from the LB. Waited about 20 mins and the
> pending queue did not clear at all.
>
> Killing Cassandra and restarting it, this box recovered.

If those 42k reads represent all reads done over a several-day period,
it seems unlikely that you're really hitting it too hard. Or are
individual reads really expensive? (If you already posted that info,
sorry)

Also: If you're hitting it too hard such that you have a cumulative
effect over time until you notice it, I'd expect latencies to become
worse and worse over time (and obviously so); you wouldn't be all fine
and then suddenly freeze.

How much do you know about individual request success rate? is it
possible that some subset of requests never finish at all (does your
application log timeouts?), while others proceed without difficulty.
That might lead to an overall build-up of pending jobs even though
most requests make it through. Then once you eventually hit a queue
size limit/concurrency limit you may experience a "sudden" hang.

I'm still wondering what the threads are actually doing in the ROW-READ-STAGE.

Are you blocking forever trying to read from disk for example? Such
things can happen with hardware issues or OS issues; some particular
part of a file may be unreadable. In such a case you'd expect queries
for certain columns or ranges of column to hang.

I think it might be useful to observe the stages stats immediately on
restart. Keep an eye out for how many concurrent requests there are in
the ROW-READ-STAGE. Based on the numbers you've posted I'd suspect
that the concurrency will usually be 0. In any case you should see it
fluctuate, and hopefully it will be visible when the fluctuations
suddenly displace by one thread (so you go from fluctuating between
e.g. 0-3 threads to between 1-4 threads never going below 1). This
should indicate you have a hung thread, and it may be worth attaching
with jconsole to see what that thread is blocking on, before so much
time has passed that you have thousands of threads waiting.

If it *is* a matter of parts of files being unreadable it may be
easier to spot using standard I/O rather than mmap():ed I/O since you
should clearly see it blocking on a read in that case.

-- 
/ Peter Schuller

Re: what causes MESSAGE-DESERIALIZER-POOL to spike

Posted by Dathan Pattishall <da...@gmail.com>.

To Follow up on this thread. I blew away the data for my entire cluster,
waited a few days of user activity and within 3 days the server hangs
requests in the same way.


Background Info:
Make around 60 million requests per day.
70% reads
30% writes
an F5 Loadbalancer (BIGIP-LTM) in a round robin config.




IOSTAT Info:
3 MB a secon of writing data @ 13% IOWAIT

VMStat Info:
still shows a lock of blocking procs at a low CPU utilization.

Data Size:
6 GB of data per node and there is 4 nodes

cass01: Pool Name                    Active   Pending      Completed
cass01: FILEUTILS-DELETE-POOL             0         0             27
cass01: STREAM-STAGE                      0         0              8
cass01: RESPONSE-STAGE                    0         0       66439845
*cass01: ROW-READ-STAGE                    8      4098       77243463*
cass01: LB-OPERATIONS                     0         0              0
*cass01: MESSAGE-DESERIALIZER-POOL         1  14223148      139627123*
cass01: GMFD                              0         0         772032
cass01: LB-TARGET                         0         0              0
cass01: CONSISTENCY-MANAGER               0         0       35518593
cass01: ROW-MUTATION-STAGE                0         0       19809347
cass01: MESSAGE-STREAMING-POOL            0         0             24
cass01: LOAD-BALANCER-STAGE               0         0              0
cass01: FLUSH-SORTER-POOL                 0         0              0
cass01: MEMTABLE-POST-FLUSHER             0         0             74
cass01: FLUSH-WRITER-POOL                 0         0             74
cass01: AE-SERVICE-STAGE                  0         0              0
cass01: HINTED-HANDOFF-POOL               0         0              9



Keyspace: TimeFrameClicks
        Read Count: 42686
        Read Latency: 47.21777100220213 ms.
        Write Count: 18398
        Write Latency: 0.17457457332318732 ms.
        Pending Tasks: 0
                Column Family: Standard2
                SSTable count: 9
                Space used (live): 6561033040
                Space used (total): 6561033040
                Memtable Columns Count: 6711
                Memtable Data Size: 241596
                Memtable Switch Count: 1
                Read Count: 42552
                Read Latency: 41.851 ms.
                Write Count: 18398
                Write Latency: 0.031 ms.
                Pending Tasks: 0
                Key cache capacity: 200000
                Key cache size: 81499
                Key cache hit rate: 0.2495154675604193
                Row cache: disabled
                Compacted row minimum size: 0
                Compacted row maximum size: 0
                Compacted row mean size: 0


Attached is jconsole memory use.
I would attach the thread use but I could not get any info from JMX on the
threads. And clicking detect deadlock just hangs, I do not see the expected
No deadlock detected.


Based on Feedback from this list by jbellis, I'm hitting cassandra to hard.
So I removed the offending server from the LB. Waited about 20 mins and the
pending queue did not clear at all.

Killing Cassandra and restarting it, this box recovered.




So from my point of view I think there is a bug in Cassandra? Do you agree?
Possibly a dead lock in the SEDA implementation of the ROW-READ-STAGE?










On Tue, Jul 27, 2010 at 12:28 AM, Peter Schuller <
peter.schuller@infidyne.com> wrote:

> > average queue size column too. But given the vmstat output I doubt
> > this is the case since you should either be seeing a lot more wait
> > time or a lot less idle time.
>
> Hmm, another thing: you mention 16 i7 cores. I presume that's 16 in
> total, counting hyper-threading? Because that means 8 threads should
> be able to saturate 50% (as perceived by the operating system). If you
> have 32 (can you get this yet anyway?) virtual cores then I'd say that
> your vmstat output could be consistent with READ-ROW-STAGE being CPU
> bound rather than disk bound (presumably with data fitting in cache
> and not having to go down to disk). If this is the case, increasing
> read concurrency should at least make the actual problem more obvious
> (i.e., achieving CPU saturation), though it probably won't increase
> throughput much unless Cassandra is very friendly to
> hyperthreading....
>
> --
> / Peter Schuller
>

Re: what causes MESSAGE-DESERIALIZER-POOL to spike

Posted by Peter Schuller <pe...@infidyne.com>.

> average queue size column too. But given the vmstat output I doubt
> this is the case since you should either be seeing a lot more wait
> time or a lot less idle time.

Hmm, another thing: you mention 16 i7 cores. I presume that's 16 in
total, counting hyper-threading? Because that means 8 threads should
be able to saturate 50% (as perceived by the operating system). If you
have 32 (can you get this yet anyway?) virtual cores then I'd say that
your vmstat output could be consistent with READ-ROW-STAGE being CPU
bound rather than disk bound (presumably with data fitting in cache
and not having to go down to disk). If this is the case, increasing
read concurrency should at least make the actual problem more obvious
(i.e., achieving CPU saturation), though it probably won't increase
throughput much unless Cassandra is very friendly to
hyperthreading....

-- 
/ Peter Schuller

Re: what causes MESSAGE-DESERIALIZER-POOL to spike

Posted by Peter Schuller <pe...@infidyne.com>.

> Ah, the weird thing is I/O is assumed to be the limiting factor, but iops on
> the box was very low. Service time and atime very low, and the data access
> was only 6MB a second. With all of this, I'm tending to believe that the
> problem may be someplace else.

You vmstat output shows idle and wait time. I suspect an "iostat -x 1"
will show that your not keeping your underlying device busy (the
right-most column not being stuck at 100% or close to it). Is this the
case? If it is close to 100% or at 100% you'd want to look at the
average queue size column too. But given the vmstat output I doubt
this is the case since you should either be seeing a lot more wait
time or a lot less idle time.

The question is what the limiting factor is. What does jconsole/etc
say about the state of the threads in ROW-READ-STAGE? Statistically if
you poll them a few times; what does it mostly seem to be waiting on?
Normally, if the expectation is that ROW-READ-STAGE is disk bound,
that should show up by the threads usually being busy waiting for disk
I/O upon repeated polling (or waiting for more work to do if they are
idle).

(And btw for some reason I totally missed the fact that ROW-READ-STAGE
had 4096 pending and 8 active... oh well.)

-- 
/ Peter Schuller

Re: what causes MESSAGE-DESERIALIZER-POOL to spike

Posted by Dathan Pattishall <da...@gmail.com>.

Ah, the weird thing is I/O is assumed to be the limiting factor, but iops on
the box was very low. Service time and atime very low, and the data access
was only 6MB a second. With all of this, I'm tending to believe that the
problem may be someplace else.

Maybe there is a preferred Java version for Cassandra 0.6.3? I am not
running the latest 1.6 in production.


On Tue, Jul 27, 2010 at 12:01 AM, Thorvaldsson Justus <
justus.thorvaldsson@svenskaspel.se> wrote:

>  AFAIK You could use more nodes and read in parallel from them making your
> read rate go up. Also don’t write and read to the same disk may help some.
> It’s not so much about “Cassandra’s” read rate but what your hardware can
> manage.
>
>
>
> /Justus
>
>
>
> *Från:* Dathan Pattishall [mailto:dathanvp@gmail.com]
> *Skickat:* den 27 juli 2010 08:56
> *Till:* user@cassandra.apache.org
> *Ämne:* Re: what causes MESSAGE-DESERIALIZER-POOL to spike
>
>
>
>
>
> On Mon, Jul 26, 2010 at 8:30 PM, Jonathan Ellis <jb...@gmail.com> wrote:
>
> MDP is backing up because RRS is full at 4096.  This means you're not
> able to process reads as quickly as the requests are coming in.  Make
> whatever is doing those reads be less aggressive.
>
>
> So, for cassandra to function correctly I need to throttle my reads? What
> request rate is ideal? 100s reads a second, 1000s? For me I would love to do
> 100s of thousands of reads a second. Is Cassandra not suited for this?
>
>
>
> As to why the reads are slow in the first place, usually this means
> you are disk i/o bound.  Posting your cfstats can help troubleshoot
> but is no substitute for thinking about your application workload.
>
>
> How should I think about my application workload? I use cassandra as a
> distributed hash table accessing Cassandra by individual keys (BigO(1)). I
> randomly hit a node through a F5 loadbalancer, using the storage CF
> definition as defined in the sample storage-conf.xml. Each key is no more
> then 30 bytes, the value is a time stamp. I store a total for 20 million
> keys and update 1.5 million keys a day. Is there anything else I should
> really think about? What are the limitations in Cassandra that would effect
> this workload?
>
>
>
>
>
>
>
> On Mon, Jul 26, 2010 at 12:32 PM, Anthony Molinaro
> <an...@alumni.caltech.edu> wrote:
> > It's usually I/O which causes backup in MESSAGE-DESERIALIZER-POOL.  You
> > should check iostat and see what it looks like.  It may be that you
> > need more nodes in order to deal with the read/write rate.   You can also
> > use JMX to get latency values on reads and writes and see if the backup
> > has a corresponding increase in latency.  You may be able to get more
> > out of your hardware and memory with row caching but that really depends
> > on your data set.
> >
> > -Anthony
> >
> > On Mon, Jul 26, 2010 at 12:22:46PM -0700, Dathan Pattishall wrote:
> >> I have 4 nodes on enterprise type hardware (Lots of Ram 12GB, 16 i7
> cores,
> >> RAID Disks).
> >>
> >> ~# /opt/cassandra/bin/nodetool --host=localhost --port=8181 tpstats
> >> Pool Name                    Active   Pending      Completed
> >> STREAM-STAGE                      0         0              0
> >> RESPONSE-STAGE                    0         0         516280
> >> ROW-READ-STAGE                    8      4096        1164326
> >> LB-OPERATIONS                     0         0              0
> >> *MESSAGE-DESERIALIZER-POOL         1    682008        1818682*
> >> GMFD                              0         0           6467
> >> LB-TARGET                         0         0              0
> >> CONSISTENCY-MANAGER               0         0         661477
> >> ROW-MUTATION-STAGE                0         0         998780
> >> MESSAGE-STREAMING-POOL            0         0              0
> >> LOAD-BALANCER-STAGE               0         0              0
> >> FLUSH-SORTER-POOL                 0         0              0
> >> MEMTABLE-POST-FLUSHER             0         0              4
> >> FLUSH-WRITER-POOL                 0         0              4
> >> AE-SERVICE-STAGE                  0         0              0
> >> HINTED-HANDOFF-POOL               0         0              3
> >>
> >> EQX root@cass04:~# vmstat -n 1
> >>
> >> procs -----------memory---------- ---swap-- -----io---- --system--
> >> -----cpu------
> >>  r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy
> id
> >> wa st
> >>  6 10   7096 121816  16244 10375492    0    0     1     3    0    0  5
>  1
> >> 94  0  0
> >>  2 10   7096 116484  16248 10381144    0    0  5636     4 21210 9820  2
>  1
> >> 79 18  0
> >>  1  9   7096 108920  16248 10387592    0    0  6216     0 21439 9878  2
>  1
> >> 81 16  0
> >>  0  9   7096 129108  16248 10364852    0    0  6024     0 23280 8753  2
>  1
> >> 80 17  0
> >>  2  9   7096 122460  16248 10370908    0    0  6072     0 20835 9461  2
>  1
> >> 83 14  0
> >>  2  8   7096 115740  16260 10375752    0    0  5168   292 21049 9511  3
>  1
> >> 77 20  0
> >>  1 10   7096 108424  16260 10382300    0    0  6244     0 21483 8981  2
>  1
> >> 75 22  0
> >>  3  8   7096 125028  16260 10364104    0    0  5584     0 21238 9436  2
>  1
> >> 81 16  0
> >>  3  9   7096 117928  16260 10370064    0    0  5988     0 21505 10225  2
>  1
> >> 77 19  0
> >>  1  8   7096 109544  16260 10376640    0    0  6340    28 20840 8602  2
>  1
> >> 80 18  0
> >>  0  9   7096 127028  16240 10357652    0    0  5984     0 20853 9158  2
>  1
> >> 79 18  0
> >>  9  0   7096 121472  16240 10363492    0    0  5716     0 20520 8489  1
>  1
> >> 82 16  0
> >>  3  9   7096 112668  16240 10369872    0    0  6404     0 21314 9459  2
>  1
> >> 84 13  0
> >>  1  9   7096 127300  16236 10353440    0    0  5684     0 38914 10068  2
>  1
> >> 76 21  0
> >>
> >>
> >> *But the 16 cores are hardly utilized. Which indicates to me there is
> some
> >> bad thread thrashing, but why? *
> >>
> >>
> >>
> >>   1  [|||||                                               8.3%]
> Tasks:
> >> 1070 total, 1 running
> >>   2  [                                                    0.0%]     Load
> >> average: 8.34 9.05 8.82
> >>   3  [                                                    0.0%]
> Uptime:
> >> 192 days(!), 15:29:52
> >>   4  [|||||||||||                                        17.9%]
> >>   5  [|||||                                               5.7%]
> >>   6  [||                                                  1.3%]
> >>   7  [||                                                  2.6%]
> >>   8  [|                                                   0.6%]
> >>   9  [|                                                   0.6%]
> >>   10 [||                                                  1.9%]
> >>   11 [||                                                  1.9%]
> >>   12 [||                                                  1.9%]
> >>   13 [||                                                  1.3%]
> >>   14 [|                                                   0.6%]
> >>   15 [||                                                  1.3%]
> >>   16 [|                                                   0.6%]
> >>   Mem[||||||||||||||||||||||||||||||||||||||||||||1791/12028MB]
> >>   Swp[|                                               6/1983MB]
> >>
> >>   PID USER     PRI  NI  VIRT   RES   SHR S CPU% MEM%   TIME+  Command
> >> 30269 root      40   0 14100  2116   900 R  4.0  0.0  0:00.49 htop
> >> 24878 root      40   0 20.6G 8345M 6883M D  3.0 69.4  1:23.03
> >> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >> 24879 root      40   0 20.6G 8345M 6883M D  3.0 69.4  1:22.93
> >> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >> 24874 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:22.73
> >> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >> 24880 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:22.93
> >> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >> 24875 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:23.17
> >> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >> 24658 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:23.06
> >> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >> 24877 root      40   0 20.6G 8345M 6883M S  2.0 69.4  1:23.43
> >> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >> 24873 root      40   0 20.6G 8345M 6883M D  1.0 69.4  1:23.65
> >> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >> 24876 root      40   0 20.6G 8345M 6883M S  1.0 69.4  1:23.62
> >> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >> 24942 root      40   0 20.6G 8345M 6883M S  1.0 69.4  0:23.50
> >> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >> 24943 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:29.53
> >> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >> 24933 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:22.57
> >> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >> 24939 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:12.73
> >> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >> 25280 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:00.10
> >> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >
> > --
> > ------------------------------------------------------------------------
> > Anthony Molinaro                           <an...@alumni.caltech.edu>
> >
>
>
>   --
> Jonathan Ellis
> Project Chair, Apache Cassandra
> co-founder of Riptano, the source for professional Cassandra support
> http://riptano.com
>
>
>

SV: what causes MESSAGE-DESERIALIZER-POOL to spike

Posted by Thorvaldsson Justus <ju...@svenskaspel.se>.

AFAIK You could use more nodes and read in parallel from them making your read rate go up. Also don't write and read to the same disk may help some. It's not so much about "Cassandra's" read rate but what your hardware can manage.

/Justus

Från: Dathan Pattishall [mailto:dathanvp@gmail.com]
Skickat: den 27 juli 2010 08:56
Till: user@cassandra.apache.org
Ämne: Re: what causes MESSAGE-DESERIALIZER-POOL to spike


On Mon, Jul 26, 2010 at 8:30 PM, Jonathan Ellis <jb...@gmail.com>> wrote:
MDP is backing up because RRS is full at 4096.  This means you're not
able to process reads as quickly as the requests are coming in.  Make
whatever is doing those reads be less aggressive.

So, for cassandra to function correctly I need to throttle my reads? What request rate is ideal? 100s reads a second, 1000s? For me I would love to do 100s of thousands of reads a second. Is Cassandra not suited for this?


As to why the reads are slow in the first place, usually this means
you are disk i/o bound.  Posting your cfstats can help troubleshoot
but is no substitute for thinking about your application workload.

How should I think about my application workload? I use cassandra as a distributed hash table accessing Cassandra by individual keys (BigO(1)). I randomly hit a node through a F5 loadbalancer, using the storage CF definition as defined in the sample storage-conf.xml. Each key is no more then 30 bytes, the value is a time stamp. I store a total for 20 million keys and update 1.5 million keys a day. Is there anything else I should really think about? What are the limitations in Cassandra that would effect this workload?





On Mon, Jul 26, 2010 at 12:32 PM, Anthony Molinaro
<an...@alumni.caltech.edu>> wrote:
> It's usually I/O which causes backup in MESSAGE-DESERIALIZER-POOL.  You
> should check iostat and see what it looks like.  It may be that you
> need more nodes in order to deal with the read/write rate.   You can also
> use JMX to get latency values on reads and writes and see if the backup
> has a corresponding increase in latency.  You may be able to get more
> out of your hardware and memory with row caching but that really depends
> on your data set.
>
> -Anthony
>
> On Mon, Jul 26, 2010 at 12:22:46PM -0700, Dathan Pattishall wrote:
>> I have 4 nodes on enterprise type hardware (Lots of Ram 12GB, 16 i7 cores,
>> RAID Disks).
>>
>> ~# /opt/cassandra/bin/nodetool --host=localhost --port=8181 tpstats
>> Pool Name                    Active   Pending      Completed
>> STREAM-STAGE                      0         0              0
>> RESPONSE-STAGE                    0         0         516280
>> ROW-READ-STAGE                    8      4096        1164326
>> LB-OPERATIONS                     0         0              0
>> *MESSAGE-DESERIALIZER-POOL         1    682008        1818682*
>> GMFD                              0         0           6467
>> LB-TARGET                         0         0              0
>> CONSISTENCY-MANAGER               0         0         661477
>> ROW-MUTATION-STAGE                0         0         998780
>> MESSAGE-STREAMING-POOL            0         0              0
>> LOAD-BALANCER-STAGE               0         0              0
>> FLUSH-SORTER-POOL                 0         0              0
>> MEMTABLE-POST-FLUSHER             0         0              4
>> FLUSH-WRITER-POOL                 0         0              4
>> AE-SERVICE-STAGE                  0         0              0
>> HINTED-HANDOFF-POOL               0         0              3
>>
>> EQX root@cass04:~# vmstat -n 1
>>
>> procs -----------memory---------- ---swap-- -----io---- --system--
>> -----cpu------
>>  r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id
>> wa st
>>  6 10   7096 121816  16244 10375492    0    0     1     3    0    0  5  1
>> 94  0  0
>>  2 10   7096 116484  16248 10381144    0    0  5636     4 21210 9820  2  1
>> 79 18  0
>>  1  9   7096 108920  16248 10387592    0    0  6216     0 21439 9878  2  1
>> 81 16  0
>>  0  9   7096 129108  16248 10364852    0    0  6024     0 23280 8753  2  1
>> 80 17  0
>>  2  9   7096 122460  16248 10370908    0    0  6072     0 20835 9461  2  1
>> 83 14  0
>>  2  8   7096 115740  16260 10375752    0    0  5168   292 21049 9511  3  1
>> 77 20  0
>>  1 10   7096 108424  16260 10382300    0    0  6244     0 21483 8981  2  1
>> 75 22  0
>>  3  8   7096 125028  16260 10364104    0    0  5584     0 21238 9436  2  1
>> 81 16  0
>>  3  9   7096 117928  16260 10370064    0    0  5988     0 21505 10225  2  1
>> 77 19  0
>>  1  8   7096 109544  16260 10376640    0    0  6340    28 20840 8602  2  1
>> 80 18  0
>>  0  9   7096 127028  16240 10357652    0    0  5984     0 20853 9158  2  1
>> 79 18  0
>>  9  0   7096 121472  16240 10363492    0    0  5716     0 20520 8489  1  1
>> 82 16  0
>>  3  9   7096 112668  16240 10369872    0    0  6404     0 21314 9459  2  1
>> 84 13  0
>>  1  9   7096 127300  16236 10353440    0    0  5684     0 38914 10068  2  1
>> 76 21  0
>>
>>
>> *But the 16 cores are hardly utilized. Which indicates to me there is some
>> bad thread thrashing, but why? *
>>
>>
>>
>>   1  [|||||                                               8.3%]     Tasks:
>> 1070 total, 1 running
>>   2  [                                                    0.0%]     Load
>> average: 8.34 9.05 8.82
>>   3  [                                                    0.0%]     Uptime:
>> 192 days(!), 15:29:52
>>   4  [|||||||||||                                        17.9%]
>>   5  [|||||                                               5.7%]
>>   6  [||                                                  1.3%]
>>   7  [||                                                  2.6%]
>>   8  [|                                                   0.6%]
>>   9  [|                                                   0.6%]
>>   10 [||                                                  1.9%]
>>   11 [||                                                  1.9%]
>>   12 [||                                                  1.9%]
>>   13 [||                                                  1.3%]
>>   14 [|                                                   0.6%]
>>   15 [||                                                  1.3%]
>>   16 [|                                                   0.6%]
>>   Mem[||||||||||||||||||||||||||||||||||||||||||||1791/12028MB]
>>   Swp[|                                               6/1983MB]
>>
>>   PID USER     PRI  NI  VIRT   RES   SHR S CPU% MEM%   TIME+  Command
>> 30269 root      40   0 14100  2116   900 R  4.0  0.0  0:00.49 htop
>> 24878 root      40   0 20.6G 8345M 6883M D  3.0 69.4  1:23.03
>> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> 24879 root      40   0 20.6G 8345M 6883M D  3.0 69.4  1:22.93
>> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> 24874 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:22.73
>> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> 24880 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:22.93
>> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> 24875 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:23.17
>> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> 24658 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:23.06
>> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> 24877 root      40   0 20.6G 8345M 6883M S  2.0 69.4  1:23.43
>> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> 24873 root      40   0 20.6G 8345M 6883M D  1.0 69.4  1:23.65
>> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> 24876 root      40   0 20.6G 8345M 6883M S  1.0 69.4  1:23.62
>> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> 24942 root      40   0 20.6G 8345M 6883M S  1.0 69.4  0:23.50
>> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> 24943 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:29.53
>> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> 24933 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:22.57
>> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> 24939 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:12.73
>> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> 25280 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:00.10
>> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>
> --
> ------------------------------------------------------------------------
> Anthony Molinaro                           <an...@alumni.caltech.edu>>
>


--
Jonathan Ellis
Project Chair, Apache Cassandra
co-founder of Riptano, the source for professional Cassandra support
http://riptano.com

Re: what causes MESSAGE-DESERIALIZER-POOL to spike

Posted by Dathan Pattishall <da...@gmail.com>.

On Mon, Jul 26, 2010 at 8:30 PM, Jonathan Ellis <jb...@gmail.com> wrote:

> MDP is backing up because RRS is full at 4096.  This means you're not
> able to process reads as quickly as the requests are coming in.  Make
> whatever is doing those reads be less aggressive.
>
>
So, for cassandra to function correctly I need to throttle my reads? What
request rate is ideal? 100s reads a second, 1000s? For me I would love to do
100s of thousands of reads a second. Is Cassandra not suited for this?



> As to why the reads are slow in the first place, usually this means
> you are disk i/o bound.  Posting your cfstats can help troubleshoot
> but is no substitute for thinking about your application workload.
>

How should I think about my application workload? I use cassandra as a
distributed hash table accessing Cassandra by individual keys (BigO(1)). I
randomly hit a node through a F5 loadbalancer, using the storage CF
definition as defined in the sample storage-conf.xml. Each key is no more
then 30 bytes, the value is a time stamp. I store a total for 20 million
keys and update 1.5 million keys a day. Is there anything else I should
really think about? What are the limitations in Cassandra that would effect
this workload?





>
> On Mon, Jul 26, 2010 at 12:32 PM, Anthony Molinaro
> <an...@alumni.caltech.edu> wrote:
> > It's usually I/O which causes backup in MESSAGE-DESERIALIZER-POOL.  You
> > should check iostat and see what it looks like.  It may be that you
> > need more nodes in order to deal with the read/write rate.   You can also
> > use JMX to get latency values on reads and writes and see if the backup
> > has a corresponding increase in latency.  You may be able to get more
> > out of your hardware and memory with row caching but that really depends
> > on your data set.
> >
> > -Anthony
> >
> > On Mon, Jul 26, 2010 at 12:22:46PM -0700, Dathan Pattishall wrote:
> >> I have 4 nodes on enterprise type hardware (Lots of Ram 12GB, 16 i7
> cores,
> >> RAID Disks).
> >>
> >> ~# /opt/cassandra/bin/nodetool --host=localhost --port=8181 tpstats
> >> Pool Name                    Active   Pending      Completed
> >> STREAM-STAGE                      0         0              0
> >> RESPONSE-STAGE                    0         0         516280
> >> ROW-READ-STAGE                    8      4096        1164326
> >> LB-OPERATIONS                     0         0              0
> >> *MESSAGE-DESERIALIZER-POOL         1    682008        1818682*
> >> GMFD                              0         0           6467
> >> LB-TARGET                         0         0              0
> >> CONSISTENCY-MANAGER               0         0         661477
> >> ROW-MUTATION-STAGE                0         0         998780
> >> MESSAGE-STREAMING-POOL            0         0              0
> >> LOAD-BALANCER-STAGE               0         0              0
> >> FLUSH-SORTER-POOL                 0         0              0
> >> MEMTABLE-POST-FLUSHER             0         0              4
> >> FLUSH-WRITER-POOL                 0         0              4
> >> AE-SERVICE-STAGE                  0         0              0
> >> HINTED-HANDOFF-POOL               0         0              3
> >>
> >> EQX root@cass04:~# vmstat -n 1
> >>
> >> procs -----------memory---------- ---swap-- -----io---- --system--
> >> -----cpu------
> >>  r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy
> id
> >> wa st
> >>  6 10   7096 121816  16244 10375492    0    0     1     3    0    0  5
>  1
> >> 94  0  0
> >>  2 10   7096 116484  16248 10381144    0    0  5636     4 21210 9820  2
>  1
> >> 79 18  0
> >>  1  9   7096 108920  16248 10387592    0    0  6216     0 21439 9878  2
>  1
> >> 81 16  0
> >>  0  9   7096 129108  16248 10364852    0    0  6024     0 23280 8753  2
>  1
> >> 80 17  0
> >>  2  9   7096 122460  16248 10370908    0    0  6072     0 20835 9461  2
>  1
> >> 83 14  0
> >>  2  8   7096 115740  16260 10375752    0    0  5168   292 21049 9511  3
>  1
> >> 77 20  0
> >>  1 10   7096 108424  16260 10382300    0    0  6244     0 21483 8981  2
>  1
> >> 75 22  0
> >>  3  8   7096 125028  16260 10364104    0    0  5584     0 21238 9436  2
>  1
> >> 81 16  0
> >>  3  9   7096 117928  16260 10370064    0    0  5988     0 21505 10225  2
>  1
> >> 77 19  0
> >>  1  8   7096 109544  16260 10376640    0    0  6340    28 20840 8602  2
>  1
> >> 80 18  0
> >>  0  9   7096 127028  16240 10357652    0    0  5984     0 20853 9158  2
>  1
> >> 79 18  0
> >>  9  0   7096 121472  16240 10363492    0    0  5716     0 20520 8489  1
>  1
> >> 82 16  0
> >>  3  9   7096 112668  16240 10369872    0    0  6404     0 21314 9459  2
>  1
> >> 84 13  0
> >>  1  9   7096 127300  16236 10353440    0    0  5684     0 38914 10068  2
>  1
> >> 76 21  0
> >>
> >>
> >> *But the 16 cores are hardly utilized. Which indicates to me there is
> some
> >> bad thread thrashing, but why? *
> >>
> >>
> >>
> >>   1  [|||||                                               8.3%]
> Tasks:
> >> 1070 total, 1 running
> >>   2  [                                                    0.0%]     Load
> >> average: 8.34 9.05 8.82
> >>   3  [                                                    0.0%]
> Uptime:
> >> 192 days(!), 15:29:52
> >>   4  [|||||||||||                                        17.9%]
> >>   5  [|||||                                               5.7%]
> >>   6  [||                                                  1.3%]
> >>   7  [||                                                  2.6%]
> >>   8  [|                                                   0.6%]
> >>   9  [|                                                   0.6%]
> >>   10 [||                                                  1.9%]
> >>   11 [||                                                  1.9%]
> >>   12 [||                                                  1.9%]
> >>   13 [||                                                  1.3%]
> >>   14 [|                                                   0.6%]
> >>   15 [||                                                  1.3%]
> >>   16 [|                                                   0.6%]
> >>   Mem[||||||||||||||||||||||||||||||||||||||||||||1791/12028MB]
> >>   Swp[|                                               6/1983MB]
> >>
> >>   PID USER     PRI  NI  VIRT   RES   SHR S CPU% MEM%   TIME+  Command
> >> 30269 root      40   0 14100  2116   900 R  4.0  0.0  0:00.49 htop
> >> 24878 root      40   0 20.6G 8345M 6883M D  3.0 69.4  1:23.03
> >> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >> 24879 root      40   0 20.6G 8345M 6883M D  3.0 69.4  1:22.93
> >> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >> 24874 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:22.73
> >> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >> 24880 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:22.93
> >> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >> 24875 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:23.17
> >> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >> 24658 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:23.06
> >> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >> 24877 root      40   0 20.6G 8345M 6883M S  2.0 69.4  1:23.43
> >> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >> 24873 root      40   0 20.6G 8345M 6883M D  1.0 69.4  1:23.65
> >> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >> 24876 root      40   0 20.6G 8345M 6883M S  1.0 69.4  1:23.62
> >> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >> 24942 root      40   0 20.6G 8345M 6883M S  1.0 69.4  0:23.50
> >> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >> 24943 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:29.53
> >> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >> 24933 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:22.57
> >> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >> 24939 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:12.73
> >> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >> 25280 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:00.10
> >> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> >
> > --
> > ------------------------------------------------------------------------
> > Anthony Molinaro                           <an...@alumni.caltech.edu>
> >
>
>
>
> --
> Jonathan Ellis
> Project Chair, Apache Cassandra
> co-founder of Riptano, the source for professional Cassandra support
> http://riptano.com
>

Re: what causes MESSAGE-DESERIALIZER-POOL to spike

Posted by Jonathan Ellis <jb...@gmail.com>.

MDP is backing up because RRS is full at 4096.  This means you're not
able to process reads as quickly as the requests are coming in.  Make
whatever is doing those reads be less aggressive.

As to why the reads are slow in the first place, usually this means
you are disk i/o bound.  Posting your cfstats can help troubleshoot
but is no substitute for thinking about your application workload.

On Mon, Jul 26, 2010 at 12:32 PM, Anthony Molinaro
<an...@alumni.caltech.edu> wrote:
> It's usually I/O which causes backup in MESSAGE-DESERIALIZER-POOL.  You
> should check iostat and see what it looks like.  It may be that you
> need more nodes in order to deal with the read/write rate.   You can also
> use JMX to get latency values on reads and writes and see if the backup
> has a corresponding increase in latency.  You may be able to get more
> out of your hardware and memory with row caching but that really depends
> on your data set.
>
> -Anthony
>
> On Mon, Jul 26, 2010 at 12:22:46PM -0700, Dathan Pattishall wrote:
>> I have 4 nodes on enterprise type hardware (Lots of Ram 12GB, 16 i7 cores,
>> RAID Disks).
>>
>> ~# /opt/cassandra/bin/nodetool --host=localhost --port=8181 tpstats
>> Pool Name                    Active   Pending      Completed
>> STREAM-STAGE                      0         0              0
>> RESPONSE-STAGE                    0         0         516280
>> ROW-READ-STAGE                    8      4096        1164326
>> LB-OPERATIONS                     0         0              0
>> *MESSAGE-DESERIALIZER-POOL         1    682008        1818682*
>> GMFD                              0         0           6467
>> LB-TARGET                         0         0              0
>> CONSISTENCY-MANAGER               0         0         661477
>> ROW-MUTATION-STAGE                0         0         998780
>> MESSAGE-STREAMING-POOL            0         0              0
>> LOAD-BALANCER-STAGE               0         0              0
>> FLUSH-SORTER-POOL                 0         0              0
>> MEMTABLE-POST-FLUSHER             0         0              4
>> FLUSH-WRITER-POOL                 0         0              4
>> AE-SERVICE-STAGE                  0         0              0
>> HINTED-HANDOFF-POOL               0         0              3
>>
>> EQX root@cass04:~# vmstat -n 1
>>
>> procs -----------memory---------- ---swap-- -----io---- --system--
>> -----cpu------
>>  r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id
>> wa st
>>  6 10   7096 121816  16244 10375492    0    0     1     3    0    0  5  1
>> 94  0  0
>>  2 10   7096 116484  16248 10381144    0    0  5636     4 21210 9820  2  1
>> 79 18  0
>>  1  9   7096 108920  16248 10387592    0    0  6216     0 21439 9878  2  1
>> 81 16  0
>>  0  9   7096 129108  16248 10364852    0    0  6024     0 23280 8753  2  1
>> 80 17  0
>>  2  9   7096 122460  16248 10370908    0    0  6072     0 20835 9461  2  1
>> 83 14  0
>>  2  8   7096 115740  16260 10375752    0    0  5168   292 21049 9511  3  1
>> 77 20  0
>>  1 10   7096 108424  16260 10382300    0    0  6244     0 21483 8981  2  1
>> 75 22  0
>>  3  8   7096 125028  16260 10364104    0    0  5584     0 21238 9436  2  1
>> 81 16  0
>>  3  9   7096 117928  16260 10370064    0    0  5988     0 21505 10225  2  1
>> 77 19  0
>>  1  8   7096 109544  16260 10376640    0    0  6340    28 20840 8602  2  1
>> 80 18  0
>>  0  9   7096 127028  16240 10357652    0    0  5984     0 20853 9158  2  1
>> 79 18  0
>>  9  0   7096 121472  16240 10363492    0    0  5716     0 20520 8489  1  1
>> 82 16  0
>>  3  9   7096 112668  16240 10369872    0    0  6404     0 21314 9459  2  1
>> 84 13  0
>>  1  9   7096 127300  16236 10353440    0    0  5684     0 38914 10068  2  1
>> 76 21  0
>>
>>
>> *But the 16 cores are hardly utilized. Which indicates to me there is some
>> bad thread thrashing, but why? *
>>
>>
>>
>>   1  [|||||                                               8.3%]     Tasks:
>> 1070 total, 1 running
>>   2  [                                                    0.0%]     Load
>> average: 8.34 9.05 8.82
>>   3  [                                                    0.0%]     Uptime:
>> 192 days(!), 15:29:52
>>   4  [|||||||||||                                        17.9%]
>>   5  [|||||                                               5.7%]
>>   6  [||                                                  1.3%]
>>   7  [||                                                  2.6%]
>>   8  [|                                                   0.6%]
>>   9  [|                                                   0.6%]
>>   10 [||                                                  1.9%]
>>   11 [||                                                  1.9%]
>>   12 [||                                                  1.9%]
>>   13 [||                                                  1.3%]
>>   14 [|                                                   0.6%]
>>   15 [||                                                  1.3%]
>>   16 [|                                                   0.6%]
>>   Mem[||||||||||||||||||||||||||||||||||||||||||||1791/12028MB]
>>   Swp[|                                               6/1983MB]
>>
>>   PID USER     PRI  NI  VIRT   RES   SHR S CPU% MEM%   TIME+  Command
>> 30269 root      40   0 14100  2116   900 R  4.0  0.0  0:00.49 htop
>> 24878 root      40   0 20.6G 8345M 6883M D  3.0 69.4  1:23.03
>> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> 24879 root      40   0 20.6G 8345M 6883M D  3.0 69.4  1:22.93
>> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> 24874 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:22.73
>> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> 24880 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:22.93
>> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> 24875 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:23.17
>> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> 24658 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:23.06
>> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> 24877 root      40   0 20.6G 8345M 6883M S  2.0 69.4  1:23.43
>> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> 24873 root      40   0 20.6G 8345M 6883M D  1.0 69.4  1:23.65
>> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> 24876 root      40   0 20.6G 8345M 6883M S  1.0 69.4  1:23.62
>> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> 24942 root      40   0 20.6G 8345M 6883M S  1.0 69.4  0:23.50
>> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> 24943 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:29.53
>> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> 24933 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:22.57
>> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> 24939 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:12.73
>> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>> 25280 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:00.10
>> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
>
> --
> ------------------------------------------------------------------------
> Anthony Molinaro                           <an...@alumni.caltech.edu>
>



-- 
Jonathan Ellis
Project Chair, Apache Cassandra
co-founder of Riptano, the source for professional Cassandra support
http://riptano.com

Re: what causes MESSAGE-DESERIALIZER-POOL to spike

Posted by Anthony Molinaro <an...@alumni.caltech.edu>.

It's usually I/O which causes backup in MESSAGE-DESERIALIZER-POOL.  You
should check iostat and see what it looks like.  It may be that you
need more nodes in order to deal with the read/write rate.   You can also
use JMX to get latency values on reads and writes and see if the backup
has a corresponding increase in latency.  You may be able to get more
out of your hardware and memory with row caching but that really depends
on your data set.

-Anthony

On Mon, Jul 26, 2010 at 12:22:46PM -0700, Dathan Pattishall wrote:
> I have 4 nodes on enterprise type hardware (Lots of Ram 12GB, 16 i7 cores,
> RAID Disks).
> 
> ~# /opt/cassandra/bin/nodetool --host=localhost --port=8181 tpstats
> Pool Name                    Active   Pending      Completed
> STREAM-STAGE                      0         0              0
> RESPONSE-STAGE                    0         0         516280
> ROW-READ-STAGE                    8      4096        1164326
> LB-OPERATIONS                     0         0              0
> *MESSAGE-DESERIALIZER-POOL         1    682008        1818682*
> GMFD                              0         0           6467
> LB-TARGET                         0         0              0
> CONSISTENCY-MANAGER               0         0         661477
> ROW-MUTATION-STAGE                0         0         998780
> MESSAGE-STREAMING-POOL            0         0              0
> LOAD-BALANCER-STAGE               0         0              0
> FLUSH-SORTER-POOL                 0         0              0
> MEMTABLE-POST-FLUSHER             0         0              4
> FLUSH-WRITER-POOL                 0         0              4
> AE-SERVICE-STAGE                  0         0              0
> HINTED-HANDOFF-POOL               0         0              3
> 
> EQX root@cass04:~# vmstat -n 1
> 
> procs -----------memory---------- ---swap-- -----io---- --system--
> -----cpu------
>  r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id
> wa st
>  6 10   7096 121816  16244 10375492    0    0     1     3    0    0  5  1
> 94  0  0
>  2 10   7096 116484  16248 10381144    0    0  5636     4 21210 9820  2  1
> 79 18  0
>  1  9   7096 108920  16248 10387592    0    0  6216     0 21439 9878  2  1
> 81 16  0
>  0  9   7096 129108  16248 10364852    0    0  6024     0 23280 8753  2  1
> 80 17  0
>  2  9   7096 122460  16248 10370908    0    0  6072     0 20835 9461  2  1
> 83 14  0
>  2  8   7096 115740  16260 10375752    0    0  5168   292 21049 9511  3  1
> 77 20  0
>  1 10   7096 108424  16260 10382300    0    0  6244     0 21483 8981  2  1
> 75 22  0
>  3  8   7096 125028  16260 10364104    0    0  5584     0 21238 9436  2  1
> 81 16  0
>  3  9   7096 117928  16260 10370064    0    0  5988     0 21505 10225  2  1
> 77 19  0
>  1  8   7096 109544  16260 10376640    0    0  6340    28 20840 8602  2  1
> 80 18  0
>  0  9   7096 127028  16240 10357652    0    0  5984     0 20853 9158  2  1
> 79 18  0
>  9  0   7096 121472  16240 10363492    0    0  5716     0 20520 8489  1  1
> 82 16  0
>  3  9   7096 112668  16240 10369872    0    0  6404     0 21314 9459  2  1
> 84 13  0
>  1  9   7096 127300  16236 10353440    0    0  5684     0 38914 10068  2  1
> 76 21  0
> 
> 
> *But the 16 cores are hardly utilized. Which indicates to me there is some
> bad thread thrashing, but why? *
> 
> 
> 
>   1  [|||||                                               8.3%]     Tasks:
> 1070 total, 1 running
>   2  [                                                    0.0%]     Load
> average: 8.34 9.05 8.82
>   3  [                                                    0.0%]     Uptime:
> 192 days(!), 15:29:52
>   4  [|||||||||||                                        17.9%]
>   5  [|||||                                               5.7%]
>   6  [||                                                  1.3%]
>   7  [||                                                  2.6%]
>   8  [|                                                   0.6%]
>   9  [|                                                   0.6%]
>   10 [||                                                  1.9%]
>   11 [||                                                  1.9%]
>   12 [||                                                  1.9%]
>   13 [||                                                  1.3%]
>   14 [|                                                   0.6%]
>   15 [||                                                  1.3%]
>   16 [|                                                   0.6%]
>   Mem[||||||||||||||||||||||||||||||||||||||||||||1791/12028MB]
>   Swp[|                                               6/1983MB]
> 
>   PID USER     PRI  NI  VIRT   RES   SHR S CPU% MEM%   TIME+  Command
> 30269 root      40   0 14100  2116   900 R  4.0  0.0  0:00.49 htop
> 24878 root      40   0 20.6G 8345M 6883M D  3.0 69.4  1:23.03
> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> 24879 root      40   0 20.6G 8345M 6883M D  3.0 69.4  1:22.93
> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> 24874 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:22.73
> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> 24880 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:22.93
> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> 24875 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:23.17
> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> 24658 root      40   0 20.6G 8345M 6883M D  2.0 69.4  1:23.06
> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> 24877 root      40   0 20.6G 8345M 6883M S  2.0 69.4  1:23.43
> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> 24873 root      40   0 20.6G 8345M 6883M D  1.0 69.4  1:23.65
> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> 24876 root      40   0 20.6G 8345M 6883M S  1.0 69.4  1:23.62
> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> 24942 root      40   0 20.6G 8345M 6883M S  1.0 69.4  0:23.50
> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> 24943 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:29.53
> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> 24933 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:22.57
> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> 24939 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:12.73
> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark
> 25280 root      40   0 20.6G 8345M 6883M S  0.0 69.4  0:00.10
> /opt/java/bin/java -ea -Xms1G -Xmx7G -XX:+UseParNewGC -XX:+UseConcMark

-- 
------------------------------------------------------------------------
Anthony Molinaro                           <an...@alumni.caltech.edu>