You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@trafodion.apache.org by "Alice Chen (JIRA)" <ji...@apache.org> on 2015/07/22 20:17:47 UTC
[jira] [Created] (TRAFODION-676) LP Bug: 1378089 - Drop table hangs
for more than 8 hours
Alice Chen created TRAFODION-676:
------------------------------------
Summary: LP Bug: 1378089 - Drop table hangs for more than 8 hours
Key: TRAFODION-676
URL: https://issues.apache.org/jira/browse/TRAFODION-676
Project: Apache Trafodion
Issue Type: Bug
Components: sql-exe
Reporter: Weishiun Tsai
Assignee: Apache Trafodion
Priority: Blocker
Fix For: 0.9 (pre-incubation)
During the QA regression test run, SQL would run into various hang situations. In this particular case, it was a drop table statement that hanged for more than 8 hours. The problem might not necessarily be only limited to drop table, but this is one situation that we had a chance to debug more and acquired more information right now. Here is the initial analysis of the hanged mxosrvr. It may be related to accessing the _MD_tables. This bug report is created to track this problem. More info will be added once more analysis is done.
(gdb) p cli_globals->defaultContext_->statementList_->lastReturned_->entry_
$29 = (void *) 0x7fffc2e33a28
# Cast this to a CliStatement and look at the scheduler’s subtask trace.
(gdb) p ((CliStatement *) 0x7fffc2e33a28)->statementGlobals_->sch_->subtaskTrace_
$30 = {{
lastCalledTask_ = 0x7fffc2e2d2c8,
lastWorkRetcode_ = 0,
rmsTimeConsumed_ = 4
}, {
lastCalledTask_ = 0x7fffc2e2c2c0,
lastWorkRetcode_ = 666,
rmsTimeConsumed_ = -1
}, {
lastCalledTask_ = 0x0,
lastWorkRetcode_ = 777,
rmsTimeConsumed_ = -1
}, {
lastCalledTask_ = 0x0,
lastWorkRetcode_ = 777,
rmsTimeConsumed_ = -1
}, {
lastCalledTask_ = 0x0,
lastWorkRetcode_ = 777,
rmsTimeConsumed_ = -1
}, {
lastCalledTask_ = 0x0,
lastWorkRetcode_ = 777,
---Type <return> to continue, or q <return> to quit--- q
# The 777 value for lastWorkRetCode_ indicates the trace entry has never been used. The 666 value indicates the subtask is called but not yet returned. So this statement is just starting to execute.
(gdb) p ((CliStatement *) 0x7fffc2e33a28)->source_str
$32 = 0x7fffc2e34560 "select column_name, column_number, column_class, fs_data_type, column_size, column_precision, column_scale, datetime_start_field, datetime_end_field, trim(is_upshifted), column_flags, nullable, trim(character_set), default_class, default_value, trim(column_heading), hbase_col_family, hbase_col_qualifier, direction, is_optional from TRAFODION.\"_MD_\".COLUMNS where object_uid = 92933818424918124 and direction in (' ')order by 2 for read committed access"
(gdb) p *((ExHbaseAccessSelectTcb *)((CliStatement *) 0x7fffc2e33a28)->statementGlobals_->sch_->subtaskTrace_[1].lastCalledTask_->tcb_)
$14 = {
<ExHbaseAccessTcb> = {
<ex_tcb> = {
<ExGod> = {
<NABasicObject> = {
_vptr.NABasicObject = 0x7ffff37cba90,
h_ = 0x7fffc2e05070
}, <No data fields>},
members of ex_tcb:
objectId = 0,
eyeCatcher_ = {
name_for_sun_compiler = "HBSA"
},
nodeType_ = ComTdb::ex_HBASE_ACCESS,
version_ = 1,
globals_ = 0x7fffc2e286f0,
statsEntry_ = 0x103316b0,
tdb = @0x7fffc2e38360,
pool_ = 0x0,
holdable_ = 0
},
members of ExHbaseAccessTcb:
qparent_ = {
---Type <return> to continue, or q <return> to quit---
down = 0x7fffc2e2aca8,
up = 0x7fffc2e2ae10
},
matches_ = 0,
workAtp_ = 0x7fffc2e2b030,
pool_ = 0x7fffc2e2ab48,
ehi_ = 0x7fffc2e3c9d8,
hnl_ = {
<NACollection<HbaseStr>> = {
<NABasicObject> = {
_vptr.NABasicObject = 0x7ffff37cb790,
h_ = 0x7ffff72df150
},
members of NACollection<HbaseStr>:
maxLength_ = 0,
usedLength_ = 0,
entries_ = 0,
arr_ = 0x0,
usages_ = 0x0,
heap_ = 0x0
},
members of NAList<HbaseStr>:
first_ = 111111111,
---Type <return> to continue, or q <return> to quit---
last_ = 111111111,
userIndexCache_ = 111111111,
arrayIndexCache_ = 111111111
},
table_ = {
val = 0x7fffc2e38300 "TRAFODION._MD_.COLUMNS",
len = 22
},
rowId_ = {
val = 0x7ffff3ae4cf0 "\016\u0585\363\377\177",
len = -45226
},
colFamName_ = {
val = 0x0,
len = 0
},
colName_ = {
val = 0x0,
len = 0
},
colVal_ = {
val = 0x0,
len = -1025486736
---Type <return> to continue, or q <return> to quit---
},
colTS_ = 584115552256,
beginRowId_ = "\201J*\320\302\u0580l", '\000' <repeats 255 times>,
endRowId_ = "\201J*\320\302\u0580l", '\364\217\277\277' <repeats 64 times>,
rowIds_ = std::vector of length 0, capacity 0,
columns_ = std::vector of length 21, capacity 32 = {"#1:\001", "#1:\002",
"#1:\003", "#1:\004", "#1:\005", "#1:\006", "#1:\a", "#1:\b", "#1:\t",
"#1:\n", "#1:\v", "#1:\f", "#1:\r", "#1:\016", "#1:\017", "#1:\020",
"#1:\021", "#1:\022", "#1:\023", "#1:\024", "#1:\025"},
deletedColumns_ = std::vector of length 0, capacity 0,
hbaseFilterColumns_ = std::vector of length 0, capacity 0,
hbaseFilterOps_ = std::vector of length 0, capacity 0,
hbaseFilterValues_ = std::vector of length 0, capacity 0,
asciiRow_ = 0x7fffc2e3cef8 "",
convertRow_ = 0x7fffc2e3d5c0 "",
updateRow_ = 0x0,
mergeInsertRow_ = 0x0,
rowIdRow_ = 0x0,
rowIdAsciiRow_ = 0x7ffff72df150 "\024\210\r\367\377\177",
beginRowIdRow_ = 0x0,
endRowIdRow_ = 0x0,
asciiRowMissingCols_ = 0x7fffc2e2a158 "",
latestTimestampForCols_ = 0x7fffc2e2a178,
---Type <return> to continue, or q <return> to quit---
beginKeyRow_ = 0x7fffc2e2a228 "",
endKeyRow_ = 0x7fffc2e3e350 "",
encodedKeyRow_ = 0x7fffc2e3e460 "",
keyColValRow_ = 0x0,
hbaseFilterValRow_ = 0x0,
rowwiseRow_ = 0x7fffc2e3dc88 "",
rowwiseRowLen_ = 0,
prevRowId_ = {
val = 0x0,
len = 0
},
prevRowIdMaxLen_ = 0,
isEOD_ = 0,
hsr_ = 0x7fffc2e2ad10,
hgr_ = 0x3,
keySubsetExeExpr_ = 0x7fffc2e2b338,
keyMdamExeExpr_ = 0x0,
currRowidIdx_ = 0,
rowID_ = {
val = 0x0,
len = 0
},
rowIDAllocatedLen_ = 0,
---Type <return> to continue, or q <return> to quit---
rowIDAllocatedVal_ = 0x0,
directRowIDBuffer_ = 0x0,
directRowIDBufferLen_ = 0,
directBufferRowNum_ = 0,
dbRowID_ = {
val = 0x0,
len = 0
},
rowIDs_ = {
val = 0x0,
len = 0
},
directRowBuffer_ = 0x0,
directRowBufferLen_ = 0,
directBufferMaxRows_ = 0,
row_ = {
val = 0x0,
len = 0
},
rows_ = {
val = 0x0,
len = 0
}
---Type <return> to continue, or q <return> to quit---
},
members of ExHbaseAccessSelectTcb:
step_ = ExHbaseAccessSelectTcb::PROCESS_SCAN_KEY, = watchpoint here doesn’t hit.
samplePercentage_ = -1,
scanTaskTcb_ = 0x0,
scanRowwiseTaskTcb_ = 0x0,
getTaskTcb_ = 0x0,
getRowwiseTaskTcb_ = 0x0,
scanSQTaskTcb_ = 0x7fffc2e2a338,
getSQTaskTcb_ = 0x0,
scanTask_ = 0x7fffc2e2a338,
getTask_ = 0x0
}
(gdb) p *((ExHbaseScanSQTaskTcb *)((ExHbaseAccessSelectTcb *)((CliStatement *) 0x7fffc2e33a28)->statementGlobals_->sch_->subtaskTrace_[1].lastCalledTask_->tcb_)->scanTask_)
$20 = {
<ExHbaseTaskTcb> = {
<ExGod> = {
<NABasicObject> = {
_vptr.NABasicObject = 0x7ffff37cbc30,
h_ = 0x7fffc2e33ae8
}, <No data fields>},
members of ExHbaseTaskTcb:
tcb_ = 0x7fffc2e2a7d8
},
members of ExHbaseScanSQTaskTcb:
step_ = ExHbaseScanSQTaskTcb::SCAN_FETCH_NEXT_ROW = watchpoint here doesn’t hit either.
}
Assigned to LaunchPad User Mike Hanlon
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)