You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@trafficserver.apache.org by "xiongzongtao (JIRA)" <ji...@apache.org> on 2013/11/20 08:43:24 UTC
[jira] [Created] (TS-2373) the cache is not initialized properly
xiongzongtao created TS-2373:
--------------------------------
Summary: the cache is not initialized properly
Key: TS-2373
URL: https://issues.apache.org/jira/browse/TS-2373
Project: Traffic Server
Issue Type: Bug
Components: Cache
Reporter: xiongzongtao
in recent days, there are several ATS servers on line failed to initialize cache system when restart, so, all requests are simply forwarded to origins.
Note: the storage is configured to use raw devices, like this:
/dev/raw/raw1 146163105792
/dev/raw/raw2 146163105792
/dev/raw/raw3 146163105792
/dev/raw/raw4 146163105792
/dev/raw/raw5 146163105792
volume.config is not configured. so, one vol per raw device
1. checking the traffic.out, found that: "cache enabled" line that
should follow the "traffic server running" line
2. echo "show:cache-stats"|traffic_shell shows that all stats are ZEROs
3. dump the memory of traffic_server by "gdb gcore" for convenience of debuging
4. use gdb to debug:
a) all vols are set in global gvol except /dev/raw/raw4.
b) one vol (/dev/raw/raw4) hunged in function handle_recover_from_data
c) check aio_req, found that, one io event are queued in
aio_req[4]'s aio_temp_list
d) check io threads, all are in pthread_cond_wait status.
here, we can see that, handle_recover_from_data issued an io request
and queued it in aio_req[4]'s aio_temp_list, but all io threads are
in pthread_cond_wait status, no one to process this request, so that,
handle_recover_from_data waiting endless. And cache will not enabled
until all the vols are inited successfully.
inspecting the code, found that,
code snippet1 from aio_thread_main
1 if (!INK_ATOMICLIST_EMPTY(my_aio_req->aio_temp_list))
2 aio_move(my_aio_req);
3 if (!(op = my_aio_req->aio_todo.pop()) && !(op = my_aio_req->http_aio_todo.pop()))
4 break;
code snippet2 from aio_queue_req
1. if (!ink_mutex_try_acquire(&req->aio_mutex)) {
ink_atomiclist_push(&req->aio_temp_list, op);
2. } else {
/* check if any pending requests on the atomic list */
if (!INK_ATOMICLIST_EMPTY(req->aio_temp_list))
aio_move(req);
/* now put the new request */
aio_insert(op, req);
ink_cond_signal(&req->aio_cond);
ink_mutex_release(&req->aio_mutex);
}
now suppose aio_temp_list is empty, thread A running code snippet1, thread B
running code snippet2, and just when thread A is between 1 and 2 in snippet1,
thread B is executing 1 in snippet2. here, thread B insert io request to
aio_temp_list, but no notification, and thread A continue to 3, and then 4(break),
finally fall in pthread_cond_wait. thus,no io thread will process this request
something about my debuging
(gdb) p theCache
$1 = (Cache *) 0xd828cb0
(gdb) p *theCache
$2 = {cache_read_done = 1, total_good_nvol = 4, total_nvol = 5, ready = 0, cache_size = 89169900,
hosttable = 0x0, total_initialized_vol = 4, scheme = 1}
(gdb) set $i=0
(gdb) p gvol[$i++]->path
$3 = 0xd826ca0 "/dev/raw/raw2"
(gdb) p gvol[$i++]->path
$4 = 0xd8267a0 "/dev/raw/raw1"
(gdb) p gvol[$i++]->path
$5 = 0xd826a00 "/dev/raw/raw5"
(gdb) p gvol[$i++]->path
$6 = 0xd8269c0 "/dev/raw/raw3"
(gdb) p gvol[$i++]->path
Cannot access memory at address 0x0
(gdb) p cp_list->head->vols[0]
$7 = (Vol *) 0xd828d10
(gdb) set $i=0
(gdb) p *cp_list->head->vols[4]
$8 = {<Continuation> = {<force_VFPT_to_top> = {_vptr.force_VFPT_to_top = 0x713010},
handler = 0x672070 <Vol::aggWrite(int, void*)>, mutex = {m_ptr = 0xd806ac0},
link = {<SLink<Continuation>> = {next = 0x0}, prev = 0x0}}, path = 0xd826a00 "/dev/raw/raw5",
hash_id = 0xd825db0 "/dev/raw/raw5 40960:17833980", hash_id_md5 = {b = {16306325817679019090,
17491422742868856741}}, fd = 38, raw_dir = 0x2aaaf0efb000 "\r025", dir = 0x2aaaf0efd000,
header = 0x2aaaf0efb000, footer = 0x2aaafbcb9000, segments = 278, buckets = 16382,
recover_pos = 68007976448, prev_recover_pos = 67994133504, scan_pos = 76968569856, skip = 40960,
start = 364421120, len = 146095964160, data_blocks = 17789500, hit_evacuate_window = 0,
io = {<AIOCallback> = {<Continuation> = {<force_VFPT_to_top> = {_vptr.force_VFPT_to_top = 0x712d70},
handler = 0x647640 <AIOCallbackInternal::io_complete(int, void*)>, mutex = {m_ptr = 0xd806ac0},
link = {<SLink<Continuation>> = {next = 0x0}, prev = 0x0}}, aiocb = {aio_fildes = 0,
aio_buf = 0x2aaabc47e000, aio_nbytes = 8388608, aio_offset = 67994133504, aio_reqprio = 0,
aio_lio_opcode = 1, aio_state = 0, aio__pad = {0}}, action = {_vptr.Action = 0x0,
continuation = 0xd8bc010, mutex = {m_ptr = 0xd806ac0}, cancelled = 0}, thread = 0x0, then = 0x0,
aio_result = 8388608}, first = 0x0, aio_req = 0xc6532f0, sleep_time = 0},
agg = {<DLL<CacheVC,Continuation::Link_link>> = {head = 0x0}, tail = 0x0},
stat_cache_vcs = {<DLL<CacheVC,Continuation::Link_link>> = {head = 0x0}, tail = 0x0},
sync = {<DLL<CacheVC,Continuation::Link_link>> = {head = 0x0}, tail = 0x0}, agg_buffer = 0x2aaaf07be000 "",
agg_todo_size = 0, agg_buf_pos = 0, trigger = 0x0, open_dir = {<Continuation> = {<force_VFPT_to_top> = {
_vptr.force_VFPT_to_top = 0x713050}, handler = 0x64c580 <OpenDir::signal_readers(int, Event*)>,
mutex = {m_ptr = 0xd806ac0}, link = {<SLink<Continuation>> = {next = 0x0}, prev = 0x0}},
delayed_readers = {<DLL<CacheVC,Link_CacheVC_opendir_link>> = {head = 0x0}, tail = 0x0}, bucket = {{
head = 0x0} <repeats 256 times>}}, ram_cache = 0x0, evacuate_size = 8709, evacuate = 0xd8bd290,
lookaside = {{head = 0x0} <repeats 256 times>}, doc_evacuator = 0x0, init_info = 0x0, disk = 0xc650f90,
---Type <return> to continue, or q <return> to quit---q
cache Quit
(gdb) p *cp_list->head->vols[3]
warning: can't find linker symbol for virtual table for `Vol' value
warning: found `CacheProcessor::start_internal(int)::__func__' instead
$9 = {<Continuation> = {<force_VFPT_to_top> = {_vptr.force_VFPT_to_top = 0x713010},
handler = 0x6460e0 <Vol::handle_recover_from_data(int, void*)>, mutex = {m_ptr = 0xd806b10},
link = {<SLink<Continuation>> = {next = 0x0}, prev = 0x0}}, path = 0xd8269e0 "/dev/raw/raw4",
hash_id = 0xd8266a0 "/dev/raw/raw4 40960:17833980", hash_id_md5 = {b = {9625335342759085551,
15230879294086164331}}, fd = 37, raw_dir = 0x2aaae59fc000 "\r025", dir = 0x2aaae59fe000,
header = 0x2aaae59fc000, footer = 0x2aaaf07ba000, segments = 278, buckets = 16382,
recover_pos = 89786622464, prev_recover_pos = 89786622464, scan_pos = 0, skip = 40960, start = 364421120,
len = 146095964160, data_blocks = 17789500, hit_evacuate_window = 0,
io = {<AIOCallback> = {<Continuation> = {<force_VFPT_to_top> = {_vptr.force_VFPT_to_top = 0x712d70},
handler = 0x647640 <AIOCallbackInternal::io_complete(int, void*)>, mutex = {m_ptr = 0xd806b10},
link = {<SLink<Continuation>> = {next = 0x0}, prev = 0x0}}, aiocb = {aio_fildes = 37,
aio_buf = 0x2aab02ca9000, aio_nbytes = 8388608, aio_offset = 89786622464, aio_reqprio = 0,
aio_lio_opcode = 1, aio_state = 0, aio__pad = {0}}, action = {_vptr.Action = 0x0,
continuation = 0xd87a010, mutex = {m_ptr = 0xd806b10}, cancelled = 0}, thread = 0x0, then = 0x0,
aio_result = 8388608}, first = 0x0, aio_req = 0xc6458b0, sleep_time = 0},
agg = {<DLL<CacheVC,Continuation::Link_link>> = {head = 0x0}, tail = 0x0},
stat_cache_vcs = {<DLL<CacheVC,Continuation::Link_link>> = {head = 0x0}, tail = 0x0},
sync = {<DLL<CacheVC,Continuation::Link_link>> = {head = 0x0}, tail = 0x0}, agg_buffer = 0x2aaae5382000 "",
agg_todo_size = 0, agg_buf_pos = 0, trigger = 0x0, open_dir = {<Continuation> = {<force_VFPT_to_top> = {
_vptr.force_VFPT_to_top = 0x713050}, handler = 0x64c580 <OpenDir::signal_readers(int, Event*)>,
mutex = {m_ptr = 0xd806b10}, link = {<SLink<Continuation>> = {next = 0x0}, prev = 0x0}},
delayed_readers = {<DLL<CacheVC,Link_CacheVC_opendir_link>> = {head = 0x0}, tail = 0x0}, bucket = {{
head = 0x0} <repeats 256 times>}}, ram_cache = 0x0, evacuate_size = 8709, evacuate = 0xd8a2aa0,
lookaside = {{head = 0x0} <repeats 256 times>}, doc_evacuator = 0x0, init_info = 0xd83b860,
---Type <return> to continue, or q <return> to quit---q
disk = 0xc643550, Quit
(gdb) p aio_reqs
$10 = {0x0, 0xc5d75f0, 0xc62f4b0, 0xc63a670, 0xc6458b0, 0xc6532f0, 0x0 <repeats 94 times>}
(gdb) p aio_reqs[5]
$11 = (AIO_Reqs *) 0xc6532f0
(gdb) p *aio_reqs[5]
$12 = {aio_todo = {<DLL<AIOCallback, Continuation::Link_link>> = {head = 0x0}, tail = 0x0},
http_aio_todo = {<DLL<AIOCallback, Continuation::Link_link>> = {head = 0x0}, tail = 0x0}, aio_temp_list = {
head = {data = 0}, name = 0x717564 "temp_list", offset = 32}, aio_mutex = {__data = {__lock = 0,
__count = 0, __owner = 0, __nusers = 8, __kind = 0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}},
__size = '\0' <repeats 12 times>, "\b", '\0' <repeats 26 times>, __align = 0}, aio_cond = {__data = {
__lock = 0, __futex = 58, __total_seq = 33, __wakeup_seq = 25, __woken_seq = 25, __mutex = 0xc653328,
__nwaiters = 16, __broadcast_seq = 0},
__size = "\000\000\000\000:\000\000\000!\000\000\000\000\000\000\000\031\000\000\000\000\000\000\000\031\000\000\000\000\000\000\000(3e\f\000\000\000\000\020\000\000\000\000\000\000", __align = 249108103168},
index = 5, pending = 0, queued = 0, filedes = 38, requests_queued = 0}
(gdb) p *aio_reqs[4]
$13 = {aio_todo = {<DLL<AIOCallback, Continuation::Link_link>> = {head = 0x0}, tail = 0x0},
http_aio_todo = {<DLL<AIOCallback, Continuation::Link_link>> = {head = 0x0}, tail = 0x0}, aio_temp_list = {
head = {data = 226992344}, name = 0x717564 "temp_list", offset = 32}, aio_mutex = {__data = {__lock = 0,
__count = 0, __owner = 0, __nusers = 8, __kind = 0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}},
__size = '\0' <repeats 12 times>, "\b", '\0' <repeats 26 times>, __align = 0}, aio_cond = {__data = {
__lock = 0, __futex = 50, __total_seq = 29, __wakeup_seq = 21, __woken_seq = 21, __mutex = 0xc6458e8,
__nwaiters = 16, __broadcast_seq = 0},
__size = "\000\000\000\0002\000\000\000\035\000\000\000\000\000\000\000\025\000\000\000\000\000\000\000\025\000\000\000\000\000\000\000f\000\000\000\000\020\000\000\000\000\000\000", __align = 214748364800},
index = 4, pending = 0, queued = 0, filedes = 37, requests_queued = 1}
(gdb) p/x *aio_reqs[4]
$14 = {aio_todo = {<DLL<AIOCallback, Continuation::Link_link>> = {head = 0x0}, tail = 0x0},
http_aio_todo = {<DLL<AIOCallback, Continuation::Link_link>> = {head = 0x0}, tail = 0x0}, aio_temp_list = {
head = {data = 0xd87a0d8}, name = 0x717564, offset = 0x20}, aio_mutex = {__data = {__lock = 0x0,
__count = 0x0, __owner = 0x0, __nusers = 0x8, __kind = 0x0, __spins = 0x0, __list = {__prev = 0x0,
__next = 0x0}}, __size = {0x0 <repeats 12 times>, 0x8, 0x0 <repeats 27 times>}, __align = 0x0},
aio_cond = {__data = {__lock = 0x0, __futex = 0x32, __total_seq = 0x1d, __wakeup_seq = 0x15,
__woken_seq = 0x15, __mutex = 0xc6458e8, __nwaiters = 0x10, __broadcast_seq = 0x0}, __size = {0x0, 0x0,
0x0, 0x0, 0x32, 0x0, 0x0, 0x0, 0x1d, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x15, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x15, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xe8, 0x58, 0x64, 0xc, 0x0, 0x0, 0x0, 0x0, 0x10, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, __align = 0x3200000000}, index = 0x4, pending = 0x0, queued = 0x0,
filedes = 0x25, requests_queued = 0x1}
(gdb) p &cp_list->head->vols[3]->io
$15 = (AIOCallbackInternal *) 0xd87a0d8
--
This message was sent by Atlassian JIRA
(v6.1#6144)