You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@trafficserver.apache.org by "Zhao Yongming (JIRA)" <ji...@apache.org> on 2012/10/22 15:16:12 UTC
[jira] [Created] (TS-1545) possible crash in records stat snap
Zhao Yongming created TS-1545:
---------------------------------
Summary: possible crash in records stat snap
Key: TS-1545
URL: https://issues.apache.org/jira/browse/TS-1545
Project: Traffic Server
Issue Type: Bug
Components: Core, Stats
Affects Versions: 3.3.0
Environment: debug build, long time regression testing
Reporter: Zhao Yongming
Fix For: 3.3.2
when I running regression testing in the loop script, we come to fail to start the server. well, the codes may handle the issue when we build without --enable-debug, but that looks like we have something may break records snap. open this issue in case someone need it.
{code}
[TrafficServer] using root directory '/opt/ats'
FATAL: RecMessage.cc:426: failed assert `eh->magic == REC_MESSAGE_ELE_MAGIC`
/opt/ats/bin/traffic_server - STACK TRACE:
/opt/ats/lib/libtsutil.so.3(ink_fatal_die+0x0)[0x7ffff7baeca1]
/opt/ats/lib/libtsutil.so.3(_Z12ink_get_randv+0x0)[0x7ffff7badbb8]
/opt/ats/bin/traffic_server(_Z23RecMessageUnmarshalNextP13RecMessageHdrP13RecMessageItrPP9RecRecord+0xbf)[0x6ed860]
/opt/ats/bin/traffic_server(_Z16RecReadStatsFilev+0xc1)[0x6e5e36]
/opt/ats/bin/traffic_server(_Z11RecCoreInit8RecModeTP5Diags+0xec)[0x6e254b]
/opt/ats/bin/traffic_server(_Z14RecProcessInit8RecModeTP5Diags+0x3b)[0x6e7769]
/opt/ats/bin/traffic_server[0x51d4a5]
/opt/ats/bin/traffic_server(main+0x1df)[0x51ee39]
/lib64/libc.so.6(__libc_start_main+0xed)[0x7ffff515b60d]
/opt/ats/bin/traffic_server[0x4d8f99]
Program received signal SIGABRT, Aborted.
0x00007ffff516ec15 in raise () from /lib64/libc.so.6
(gdb) bt
#0 0x00007ffff516ec15 in raise () from /lib64/libc.so.6
#1 0x00007ffff517008b in abort () from /lib64/libc.so.6
#2 0x00007ffff7baeb2c in ink_die_die_die (retval=1) at ink_error.cc:43
#3 0x00007ffff7baebfe in ink_fatal_va(int, const char *, typedef __va_list_tag __va_list_tag *) (return_code=1,
message_format=0x7ffff7bca3e0 "%s:%d: failed assert `%s`", ap=0x7fffffffc8b8) at ink_error.cc:65
#4 0x00007ffff7baeca1 in ink_fatal (return_code=1, message_format=0x7ffff7bca3e0 "%s:%d: failed assert `%s`") at ink_error.cc:73
#5 0x00007ffff7badbb8 in _ink_assert (expression=0x76ffa0 "eh->magic == REC_MESSAGE_ELE_MAGIC", file=0x76fe40 "RecMessage.cc", line=426)
at ink_assert.cc:38
#6 0x00000000006ed860 in RecMessageUnmarshalNext (msg=0xfe6110, itr=0x7fffffffca00, record=0x7fffffffca10) at RecMessage.cc:426
#7 0x00000000006e5e36 in RecReadStatsFile () at P_RecCore.i:569
#8 0x00000000006e254b in RecCoreInit (mode_type=RECM_STAND_ALONE, _diags=0xfe5f70) at RecCore.cc:209
#9 0x00000000006e7769 in RecProcessInit (mode_type=RECM_STAND_ALONE, _diags=0xfe5f70) at RecProcess.cc:313
#10 0x000000000051d4a5 in initialize_process_manager () at Main.cc:413
#11 0x000000000051ee39 in main (argc=1, argv=0x7fffffffdd08) at Main.cc:1409
(gdb) f 6
#6 0x00000000006ed860 in RecMessageUnmarshalNext (msg=0xfe6110, itr=0x7fffffffca00, record=0x7fffffffca10) at RecMessage.cc:426
426 ink_debug_assert(eh->magic == REC_MESSAGE_ELE_MAGIC);
(gdb) l
421 itr->ele_hdr = (RecMessageEleHdr *) ((char *) (msg) + itr->ele_hdr->o_next);
422 itr->next += 1;
423 eh = itr->ele_hdr;
424 }
425
426 ink_debug_assert(eh->magic == REC_MESSAGE_ELE_MAGIC);
427
428 // If the file is corrupt, ignore the the rest of the file.
429 if (eh->magic != REC_MESSAGE_ELE_MAGIC) {
430 Warning("Persistent statistics file records.stat is corrupted. Ignoring the rest of the file\n");
(gdb) p eh->magic
$1 = 0
(gdb) p REC_MESSAGE_ELE_MAGIC
$2 = 4027445261
(gdb) p eh
$3 = (RecMessageEleHdr *) 0xff4138
(gdb) p *eh
$4 = {magic = 0, o_next = 0}
(gdb) f 7
#7 0x00000000006e5e36 in RecReadStatsFile () at P_RecCore.i:569
569 } while (RecMessageUnmarshalNext(m, &itr, &r) != REC_ERR_FAIL);
(gdb) l
564 if (RecMessageUnmarshalFirst(m, &itr, &r) != REC_ERR_FAIL) {
565 do {
566 if ((r->name == NULL) || (!strlen(r->name)))
567 continue;
568 RecSetRecord(r->rec_type, r->name, r->data_type, &(r->data), &(r->stat_meta.data_raw), false);
569 } while (RecMessageUnmarshalNext(m, &itr, &r) != REC_ERR_FAIL);
570 }
571 }
572
573 ink_rwlock_unlock(&g_records_rwlock);
(gdb) p r
$5 = (RecRecord *) 0xff4070
(gdb) p *r
$6 = {rec_type = RECT_PROCESS, name = 0xff4118 "", data_type = RECD_INT, data = {rec_int = 0, rec_float = 0, rec_string = 0x0,
rec_counter = 0}, data_default = {rec_int = 0, rec_float = 0, rec_string = 0x0, rec_counter = 0}, lock = {nthread_holding = 1,
thread_holding = 140598138681088, the_mutex = {__data = {__lock = 1, __count = 0, __owner = 19059, __nusers = 1, __kind = 0,
__spins = 0, __list = {__prev = 0x0, __next = 0x0}},
__size = "\001\000\000\000\000\000\000\000sJ\000\000\001", '\000' <repeats 26 times>, __align = 1}}, sync_required = 1 '\001',
registered = true, {stat_meta = {data_raw = {sum = 0, count = 0, last_sum = 0, last_count = 0},
sync_cb = 0x6e8059 <RecRawStatSyncHrTimeAvg(char const*, RecDataT, RecData*, RecRawStatBlock*, int)>, sync_rsb = 0x107f640,
sync_id = 5, persist_type = RECP_NULL}, config_meta = {update_required = 0 '\000', update_cb_list = 0x0, update_cookie = 0x0,
update_type = RECU_NULL, check_type = RECC_NULL,
check_expr = 0x6e8059 <RecRawStatSyncHrTimeAvg(char const*, RecDataT, RecData*, RecRawStatBlock*, int)> "UH\211\345H\203\354`H\211}u\304H\211U\270H\211M\260D\211E\300H\213\005\275>0", access_type = (unknown: 17299008)}}, order = 0, rsb_id = 0}
(gdb) p *itr
No symbol "operator*" in current context.
(gdb) p m
$7 = (RecMessage *) 0xfe6110
(gdb) p *m
$8 = {msg_type = RECG_NULL, o_start = 24, o_write = 59760, o_end = 59760, entries = 265, alignment = 0}
(gdb)
{code}
--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators
For more information on JIRA, see: http://www.atlassian.com/software/jira
[jira] [Updated] (TS-1545) possible crash in records stat snap
Posted by "Zhao Yongming (JIRA)" <ji...@apache.org>.
[ https://issues.apache.org/jira/browse/TS-1545?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Zhao Yongming updated TS-1545:
------------------------------
Attachment: stats.snap
records.snap
the snap file which is crashing the server
> possible crash in records stat snap
> -----------------------------------
>
> Key: TS-1545
> URL: https://issues.apache.org/jira/browse/TS-1545
> Project: Traffic Server
> Issue Type: Bug
> Components: Core, Stats
> Affects Versions: 3.3.0
> Environment: debug build, long time regression testing
> Reporter: Zhao Yongming
> Fix For: 3.3.2
>
> Attachments: records.snap, stats.snap
>
>
> when I running regression testing in the loop script, we come to fail to start the server. well, the codes may handle the issue when we build without --enable-debug, but that looks like we have something may break records snap. open this issue in case someone need it.
> {code}
> [TrafficServer] using root directory '/opt/ats'
> FATAL: RecMessage.cc:426: failed assert `eh->magic == REC_MESSAGE_ELE_MAGIC`
> /opt/ats/bin/traffic_server - STACK TRACE:
> /opt/ats/lib/libtsutil.so.3(ink_fatal_die+0x0)[0x7ffff7baeca1]
> /opt/ats/lib/libtsutil.so.3(_Z12ink_get_randv+0x0)[0x7ffff7badbb8]
> /opt/ats/bin/traffic_server(_Z23RecMessageUnmarshalNextP13RecMessageHdrP13RecMessageItrPP9RecRecord+0xbf)[0x6ed860]
> /opt/ats/bin/traffic_server(_Z16RecReadStatsFilev+0xc1)[0x6e5e36]
> /opt/ats/bin/traffic_server(_Z11RecCoreInit8RecModeTP5Diags+0xec)[0x6e254b]
> /opt/ats/bin/traffic_server(_Z14RecProcessInit8RecModeTP5Diags+0x3b)[0x6e7769]
> /opt/ats/bin/traffic_server[0x51d4a5]
> /opt/ats/bin/traffic_server(main+0x1df)[0x51ee39]
> /lib64/libc.so.6(__libc_start_main+0xed)[0x7ffff515b60d]
> /opt/ats/bin/traffic_server[0x4d8f99]
> Program received signal SIGABRT, Aborted.
> 0x00007ffff516ec15 in raise () from /lib64/libc.so.6
> (gdb) bt
> #0 0x00007ffff516ec15 in raise () from /lib64/libc.so.6
> #1 0x00007ffff517008b in abort () from /lib64/libc.so.6
> #2 0x00007ffff7baeb2c in ink_die_die_die (retval=1) at ink_error.cc:43
> #3 0x00007ffff7baebfe in ink_fatal_va(int, const char *, typedef __va_list_tag __va_list_tag *) (return_code=1,
> message_format=0x7ffff7bca3e0 "%s:%d: failed assert `%s`", ap=0x7fffffffc8b8) at ink_error.cc:65
> #4 0x00007ffff7baeca1 in ink_fatal (return_code=1, message_format=0x7ffff7bca3e0 "%s:%d: failed assert `%s`") at ink_error.cc:73
> #5 0x00007ffff7badbb8 in _ink_assert (expression=0x76ffa0 "eh->magic == REC_MESSAGE_ELE_MAGIC", file=0x76fe40 "RecMessage.cc", line=426)
> at ink_assert.cc:38
> #6 0x00000000006ed860 in RecMessageUnmarshalNext (msg=0xfe6110, itr=0x7fffffffca00, record=0x7fffffffca10) at RecMessage.cc:426
> #7 0x00000000006e5e36 in RecReadStatsFile () at P_RecCore.i:569
> #8 0x00000000006e254b in RecCoreInit (mode_type=RECM_STAND_ALONE, _diags=0xfe5f70) at RecCore.cc:209
> #9 0x00000000006e7769 in RecProcessInit (mode_type=RECM_STAND_ALONE, _diags=0xfe5f70) at RecProcess.cc:313
> #10 0x000000000051d4a5 in initialize_process_manager () at Main.cc:413
> #11 0x000000000051ee39 in main (argc=1, argv=0x7fffffffdd08) at Main.cc:1409
> (gdb) f 6
> #6 0x00000000006ed860 in RecMessageUnmarshalNext (msg=0xfe6110, itr=0x7fffffffca00, record=0x7fffffffca10) at RecMessage.cc:426
> 426 ink_debug_assert(eh->magic == REC_MESSAGE_ELE_MAGIC);
> (gdb) l
> 421 itr->ele_hdr = (RecMessageEleHdr *) ((char *) (msg) + itr->ele_hdr->o_next);
> 422 itr->next += 1;
> 423 eh = itr->ele_hdr;
> 424 }
> 425
> 426 ink_debug_assert(eh->magic == REC_MESSAGE_ELE_MAGIC);
> 427
> 428 // If the file is corrupt, ignore the the rest of the file.
> 429 if (eh->magic != REC_MESSAGE_ELE_MAGIC) {
> 430 Warning("Persistent statistics file records.stat is corrupted. Ignoring the rest of the file\n");
> (gdb) p eh->magic
> $1 = 0
> (gdb) p REC_MESSAGE_ELE_MAGIC
> $2 = 4027445261
> (gdb) p eh
> $3 = (RecMessageEleHdr *) 0xff4138
> (gdb) p *eh
> $4 = {magic = 0, o_next = 0}
> (gdb) f 7
> #7 0x00000000006e5e36 in RecReadStatsFile () at P_RecCore.i:569
> 569 } while (RecMessageUnmarshalNext(m, &itr, &r) != REC_ERR_FAIL);
> (gdb) l
> 564 if (RecMessageUnmarshalFirst(m, &itr, &r) != REC_ERR_FAIL) {
> 565 do {
> 566 if ((r->name == NULL) || (!strlen(r->name)))
> 567 continue;
> 568 RecSetRecord(r->rec_type, r->name, r->data_type, &(r->data), &(r->stat_meta.data_raw), false);
> 569 } while (RecMessageUnmarshalNext(m, &itr, &r) != REC_ERR_FAIL);
> 570 }
> 571 }
> 572
> 573 ink_rwlock_unlock(&g_records_rwlock);
> (gdb) p r
> $5 = (RecRecord *) 0xff4070
> (gdb) p *r
> $6 = {rec_type = RECT_PROCESS, name = 0xff4118 "", data_type = RECD_INT, data = {rec_int = 0, rec_float = 0, rec_string = 0x0,
> rec_counter = 0}, data_default = {rec_int = 0, rec_float = 0, rec_string = 0x0, rec_counter = 0}, lock = {nthread_holding = 1,
> thread_holding = 140598138681088, the_mutex = {__data = {__lock = 1, __count = 0, __owner = 19059, __nusers = 1, __kind = 0,
> __spins = 0, __list = {__prev = 0x0, __next = 0x0}},
> __size = "\001\000\000\000\000\000\000\000sJ\000\000\001", '\000' <repeats 26 times>, __align = 1}}, sync_required = 1 '\001',
> registered = true, {stat_meta = {data_raw = {sum = 0, count = 0, last_sum = 0, last_count = 0},
> sync_cb = 0x6e8059 <RecRawStatSyncHrTimeAvg(char const*, RecDataT, RecData*, RecRawStatBlock*, int)>, sync_rsb = 0x107f640,
> sync_id = 5, persist_type = RECP_NULL}, config_meta = {update_required = 0 '\000', update_cb_list = 0x0, update_cookie = 0x0,
> update_type = RECU_NULL, check_type = RECC_NULL,
> check_expr = 0x6e8059 <RecRawStatSyncHrTimeAvg(char const*, RecDataT, RecData*, RecRawStatBlock*, int)> "UH\211\345H\203\354`H\211}u\304H\211U\270H\211M\260D\211E\300H\213\005\275>0", access_type = (unknown: 17299008)}}, order = 0, rsb_id = 0}
> (gdb) p *itr
> No symbol "operator*" in current context.
> (gdb) p m
> $7 = (RecMessage *) 0xfe6110
> (gdb) p *m
> $8 = {msg_type = RECG_NULL, o_start = 24, o_write = 59760, o_end = 59760, entries = 265, alignment = 0}
> (gdb)
> {code}
--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators
For more information on JIRA, see: http://www.atlassian.com/software/jira