You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@trafodion.apache.org by "Alice Chen (JIRA)" <ji...@apache.org> on 2015/07/22 20:17:47 UTC

[jira] [Created] (TRAFODION-676) LP Bug: 1378089 - Drop table hangs for more than 8 hours

Alice Chen created TRAFODION-676:
------------------------------------

             Summary: LP Bug: 1378089 - Drop table hangs for more than 8 hours
                 Key: TRAFODION-676
                 URL: https://issues.apache.org/jira/browse/TRAFODION-676
             Project: Apache Trafodion
          Issue Type: Bug
          Components: sql-exe
            Reporter: Weishiun Tsai
            Assignee: Apache Trafodion
            Priority: Blocker
             Fix For: 0.9 (pre-incubation)


During the QA regression test run, SQL would run into various hang situations.   In this particular case, it was a drop table statement that hanged for more than 8 hours.  The problem might not necessarily be only limited to drop table, but this is one situation that we had a chance to debug more and acquired more information right now.  Here is the initial analysis of the hanged mxosrvr.  It may be related to accessing the _MD_tables.  This bug report is created to track this problem.   More info will be added once more analysis is done.

 (gdb) p cli_globals->defaultContext_->statementList_->lastReturned_->entry_
$29 = (void *) 0x7fffc2e33a28

# Cast this to a CliStatement and look at the scheduler’s subtask trace. 
(gdb) p ((CliStatement *) 0x7fffc2e33a28)->statementGlobals_->sch_->subtaskTrace_
$30 = {{
    lastCalledTask_ = 0x7fffc2e2d2c8, 
    lastWorkRetcode_ = 0, 
    rmsTimeConsumed_ = 4
  }, {
    lastCalledTask_ = 0x7fffc2e2c2c0, 
    lastWorkRetcode_ = 666, 
    rmsTimeConsumed_ = -1
  }, {
    lastCalledTask_ = 0x0, 
    lastWorkRetcode_ = 777, 
    rmsTimeConsumed_ = -1
  }, {
    lastCalledTask_ = 0x0, 
    lastWorkRetcode_ = 777, 
    rmsTimeConsumed_ = -1
  }, {
    lastCalledTask_ = 0x0, 
    lastWorkRetcode_ = 777, 
    rmsTimeConsumed_ = -1
  }, {
    lastCalledTask_ = 0x0, 
    lastWorkRetcode_ = 777, 
---Type <return> to continue, or q <return> to quit--- q

# The 777 value for lastWorkRetCode_ indicates the trace entry has never been used. The 666 value indicates the subtask is called but not yet returned. So this statement is just starting to execute.

(gdb) p ((CliStatement *) 0x7fffc2e33a28)->source_str
$32 = 0x7fffc2e34560 "select column_name, column_number, column_class, fs_data_type, column_size, column_precision, column_scale, datetime_start_field, datetime_end_field, trim(is_upshifted), column_flags, nullable, trim(character_set), default_class, default_value, trim(column_heading), hbase_col_family, hbase_col_qualifier, direction, is_optional  from TRAFODION.\"_MD_\".COLUMNS where object_uid = 92933818424918124 and direction in ('  ')order by 2 for read committed access"

(gdb) p *((ExHbaseAccessSelectTcb *)((CliStatement *) 0x7fffc2e33a28)->statementGlobals_->sch_->subtaskTrace_[1].lastCalledTask_->tcb_)
$14 = {
  <ExHbaseAccessTcb> = {
    <ex_tcb> = {
      <ExGod> = {
        <NABasicObject> = {
          _vptr.NABasicObject = 0x7ffff37cba90, 
          h_ = 0x7fffc2e05070
        }, <No data fields>}, 
      members of ex_tcb: 
      objectId = 0, 
      eyeCatcher_ = {
        name_for_sun_compiler = "HBSA"
      }, 
      nodeType_ = ComTdb::ex_HBASE_ACCESS, 
      version_ = 1, 
      globals_ = 0x7fffc2e286f0, 
      statsEntry_ = 0x103316b0, 
      tdb = @0x7fffc2e38360, 
      pool_ = 0x0, 
      holdable_ = 0
    }, 
    members of ExHbaseAccessTcb: 
    qparent_ = {
---Type <return> to continue, or q <return> to quit---
      down = 0x7fffc2e2aca8, 
      up = 0x7fffc2e2ae10
    }, 
    matches_ = 0, 
    workAtp_ = 0x7fffc2e2b030, 
    pool_ = 0x7fffc2e2ab48, 
    ehi_ = 0x7fffc2e3c9d8, 
    hnl_ = {
      <NACollection<HbaseStr>> = {
        <NABasicObject> = {
          _vptr.NABasicObject = 0x7ffff37cb790, 
          h_ = 0x7ffff72df150
        }, 
        members of NACollection<HbaseStr>: 
        maxLength_ = 0, 
        usedLength_ = 0, 
        entries_ = 0, 
        arr_ = 0x0, 
        usages_ = 0x0, 
        heap_ = 0x0
      }, 
      members of NAList<HbaseStr>: 
      first_ = 111111111, 
---Type <return> to continue, or q <return> to quit---
      last_ = 111111111, 
      userIndexCache_ = 111111111, 
      arrayIndexCache_ = 111111111
    }, 
    table_ = {
      val = 0x7fffc2e38300 "TRAFODION._MD_.COLUMNS", 
      len = 22
    }, 
    rowId_ = {
      val = 0x7ffff3ae4cf0 "\016\u0585\363\377\177", 
      len = -45226
    }, 
    colFamName_ = {
      val = 0x0, 
      len = 0
    }, 
    colName_ = {
      val = 0x0, 
      len = 0
    }, 
    colVal_ = {
      val = 0x0, 
      len = -1025486736
---Type <return> to continue, or q <return> to quit---
    }, 
    colTS_ = 584115552256, 
    beginRowId_ = "\201J*\320\302\u0580l", '\000' <repeats 255 times>, 
    endRowId_ = "\201J*\320\302\u0580l", '\364\217\277\277' <repeats 64 times>, 
    rowIds_ = std::vector of length 0, capacity 0, 
    columns_ = std::vector of length 21, capacity 32 = {"#1:\001", "#1:\002", 
    "#1:\003", "#1:\004", "#1:\005", "#1:\006", "#1:\a", "#1:\b", "#1:\t", 
    "#1:\n", "#1:\v", "#1:\f", "#1:\r", "#1:\016", "#1:\017", "#1:\020", 
    "#1:\021", "#1:\022", "#1:\023", "#1:\024", "#1:\025"}, 
    deletedColumns_ = std::vector of length 0, capacity 0, 
    hbaseFilterColumns_ = std::vector of length 0, capacity 0, 
    hbaseFilterOps_ = std::vector of length 0, capacity 0, 
    hbaseFilterValues_ = std::vector of length 0, capacity 0, 
    asciiRow_ = 0x7fffc2e3cef8 "", 
    convertRow_ = 0x7fffc2e3d5c0 "", 
    updateRow_ = 0x0, 
    mergeInsertRow_ = 0x0, 
    rowIdRow_ = 0x0, 
    rowIdAsciiRow_ = 0x7ffff72df150 "\024\210\r\367\377\177", 
    beginRowIdRow_ = 0x0, 
    endRowIdRow_ = 0x0, 
    asciiRowMissingCols_ = 0x7fffc2e2a158 "", 
    latestTimestampForCols_ = 0x7fffc2e2a178, 
---Type <return> to continue, or q <return> to quit---
    beginKeyRow_ = 0x7fffc2e2a228 "", 
    endKeyRow_ = 0x7fffc2e3e350 "", 
    encodedKeyRow_ = 0x7fffc2e3e460 "", 
    keyColValRow_ = 0x0, 
    hbaseFilterValRow_ = 0x0, 
    rowwiseRow_ = 0x7fffc2e3dc88 "", 
    rowwiseRowLen_ = 0, 
    prevRowId_ = {
      val = 0x0, 
      len = 0
    }, 
    prevRowIdMaxLen_ = 0, 
    isEOD_ = 0, 
    hsr_ = 0x7fffc2e2ad10, 
    hgr_ = 0x3, 
    keySubsetExeExpr_ = 0x7fffc2e2b338, 
    keyMdamExeExpr_ = 0x0, 
    currRowidIdx_ = 0, 
    rowID_ = {
      val = 0x0, 
      len = 0
    }, 
    rowIDAllocatedLen_ = 0, 
---Type <return> to continue, or q <return> to quit---
    rowIDAllocatedVal_ = 0x0, 
    directRowIDBuffer_ = 0x0, 
    directRowIDBufferLen_ = 0, 
    directBufferRowNum_ = 0, 
    dbRowID_ = {
      val = 0x0, 
      len = 0
    }, 
    rowIDs_ = {
      val = 0x0, 
      len = 0
    }, 
    directRowBuffer_ = 0x0, 
    directRowBufferLen_ = 0, 
    directBufferMaxRows_ = 0, 
    row_ = {
      val = 0x0, 
      len = 0
    }, 
    rows_ = {
      val = 0x0, 
      len = 0
    }
---Type <return> to continue, or q <return> to quit---
  }, 
  members of ExHbaseAccessSelectTcb: 
  step_ = ExHbaseAccessSelectTcb::PROCESS_SCAN_KEY,    = watchpoint here doesn’t hit.
  samplePercentage_ = -1, 
  scanTaskTcb_ = 0x0, 
  scanRowwiseTaskTcb_ = 0x0, 
  getTaskTcb_ = 0x0, 
  getRowwiseTaskTcb_ = 0x0, 
  scanSQTaskTcb_ = 0x7fffc2e2a338, 
  getSQTaskTcb_ = 0x0, 
  scanTask_ = 0x7fffc2e2a338, 
  getTask_ = 0x0
}

(gdb) p *((ExHbaseScanSQTaskTcb *)((ExHbaseAccessSelectTcb *)((CliStatement *) 0x7fffc2e33a28)->statementGlobals_->sch_->subtaskTrace_[1].lastCalledTask_->tcb_)->scanTask_)
$20 = {
  <ExHbaseTaskTcb> = {
    <ExGod> = {
      <NABasicObject> = {
        _vptr.NABasicObject = 0x7ffff37cbc30, 
        h_ = 0x7fffc2e33ae8
      }, <No data fields>}, 
    members of ExHbaseTaskTcb: 
    tcb_ = 0x7fffc2e2a7d8
  }, 
  members of ExHbaseScanSQTaskTcb: 
  step_ = ExHbaseScanSQTaskTcb::SCAN_FETCH_NEXT_ROW   = watchpoint here doesn’t hit either.
}
Assigned to LaunchPad User Mike Hanlon



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)