You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by gi...@apache.org on 2018/03/17 14:53:38 UTC

[02/51] [partial] hbase-site git commit: Published site at 00095a2ef9442e3fd86c04876c9d91f2f8b23ad8.

http://git-wip-us.apache.org/repos/asf/hbase-site/blob/bd675fa3/devapidocs/src-html/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.html
----------------------------------------------------------------------
diff --git a/devapidocs/src-html/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.html b/devapidocs/src-html/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.html
index 3bc66bb..97aa79c 100644
--- a/devapidocs/src-html/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.html
+++ b/devapidocs/src-html/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.html
@@ -1435,459 +1435,460 @@
 <span class="sourceLineNo">1427</span>   */<a name="line.1427"></a>
 <span class="sourceLineNo">1428</span>  private void execProcedure(final RootProcedureState procStack,<a name="line.1428"></a>
 <span class="sourceLineNo">1429</span>      final Procedure&lt;TEnvironment&gt; procedure) {<a name="line.1429"></a>
-<span class="sourceLineNo">1430</span>    Preconditions.checkArgument(procedure.getState() == ProcedureState.RUNNABLE);<a name="line.1430"></a>
-<span class="sourceLineNo">1431</span><a name="line.1431"></a>
-<span class="sourceLineNo">1432</span>    // Procedures can suspend themselves. They skip out by throwing a ProcedureSuspendedException.<a name="line.1432"></a>
-<span class="sourceLineNo">1433</span>    // The exception is caught below and then we hurry to the exit without disturbing state. The<a name="line.1433"></a>
-<span class="sourceLineNo">1434</span>    // idea is that the processing of this procedure will be unsuspended later by an external event<a name="line.1434"></a>
-<span class="sourceLineNo">1435</span>    // such the report of a region open. TODO: Currently, its possible for two worker threads<a name="line.1435"></a>
-<span class="sourceLineNo">1436</span>    // to be working on the same procedure concurrently (locking in procedures is NOT about<a name="line.1436"></a>
-<span class="sourceLineNo">1437</span>    // concurrency but about tying an entity to a procedure; i.e. a region to a particular<a name="line.1437"></a>
-<span class="sourceLineNo">1438</span>    // procedure instance). This can make for issues if both threads are changing state.<a name="line.1438"></a>
-<span class="sourceLineNo">1439</span>    // See env.getProcedureScheduler().wakeEvent(regionNode.getProcedureEvent());<a name="line.1439"></a>
-<span class="sourceLineNo">1440</span>    // in RegionTransitionProcedure#reportTransition for example of Procedure putting<a name="line.1440"></a>
-<span class="sourceLineNo">1441</span>    // itself back on the scheduler making it possible for two threads running against<a name="line.1441"></a>
-<span class="sourceLineNo">1442</span>    // the one Procedure. Might be ok if they are both doing different, idempotent sections.<a name="line.1442"></a>
-<span class="sourceLineNo">1443</span>    boolean suspended = false;<a name="line.1443"></a>
-<span class="sourceLineNo">1444</span><a name="line.1444"></a>
-<span class="sourceLineNo">1445</span>    // Whether to 're-' -execute; run through the loop again.<a name="line.1445"></a>
-<span class="sourceLineNo">1446</span>    boolean reExecute = false;<a name="line.1446"></a>
-<span class="sourceLineNo">1447</span><a name="line.1447"></a>
-<span class="sourceLineNo">1448</span>    Procedure&lt;TEnvironment&gt;[] subprocs = null;<a name="line.1448"></a>
-<span class="sourceLineNo">1449</span>    do {<a name="line.1449"></a>
-<span class="sourceLineNo">1450</span>      reExecute = false;<a name="line.1450"></a>
-<span class="sourceLineNo">1451</span>      try {<a name="line.1451"></a>
-<span class="sourceLineNo">1452</span>        subprocs = procedure.doExecute(getEnvironment());<a name="line.1452"></a>
-<span class="sourceLineNo">1453</span>        if (subprocs != null &amp;&amp; subprocs.length == 0) {<a name="line.1453"></a>
-<span class="sourceLineNo">1454</span>          subprocs = null;<a name="line.1454"></a>
-<span class="sourceLineNo">1455</span>        }<a name="line.1455"></a>
-<span class="sourceLineNo">1456</span>      } catch (ProcedureSuspendedException e) {<a name="line.1456"></a>
-<span class="sourceLineNo">1457</span>        if (LOG.isTraceEnabled()) {<a name="line.1457"></a>
-<span class="sourceLineNo">1458</span>          LOG.trace("Suspend " + procedure);<a name="line.1458"></a>
-<span class="sourceLineNo">1459</span>        }<a name="line.1459"></a>
-<span class="sourceLineNo">1460</span>        suspended = true;<a name="line.1460"></a>
-<span class="sourceLineNo">1461</span>      } catch (ProcedureYieldException e) {<a name="line.1461"></a>
-<span class="sourceLineNo">1462</span>        if (LOG.isTraceEnabled()) {<a name="line.1462"></a>
-<span class="sourceLineNo">1463</span>          LOG.trace("Yield " + procedure + ": " + e.getMessage(), e);<a name="line.1463"></a>
-<span class="sourceLineNo">1464</span>        }<a name="line.1464"></a>
-<span class="sourceLineNo">1465</span>        scheduler.yield(procedure);<a name="line.1465"></a>
-<span class="sourceLineNo">1466</span>        return;<a name="line.1466"></a>
-<span class="sourceLineNo">1467</span>      } catch (InterruptedException e) {<a name="line.1467"></a>
-<span class="sourceLineNo">1468</span>        if (LOG.isTraceEnabled()) {<a name="line.1468"></a>
-<span class="sourceLineNo">1469</span>          LOG.trace("Yield interrupt " + procedure + ": " + e.getMessage(), e);<a name="line.1469"></a>
-<span class="sourceLineNo">1470</span>        }<a name="line.1470"></a>
-<span class="sourceLineNo">1471</span>        handleInterruptedException(procedure, e);<a name="line.1471"></a>
-<span class="sourceLineNo">1472</span>        scheduler.yield(procedure);<a name="line.1472"></a>
-<span class="sourceLineNo">1473</span>        return;<a name="line.1473"></a>
-<span class="sourceLineNo">1474</span>      } catch (Throwable e) {<a name="line.1474"></a>
-<span class="sourceLineNo">1475</span>        // Catch NullPointerExceptions or similar errors...<a name="line.1475"></a>
-<span class="sourceLineNo">1476</span>        String msg = "CODE-BUG: Uncaught runtime exception: " + procedure;<a name="line.1476"></a>
-<span class="sourceLineNo">1477</span>        LOG.error(msg, e);<a name="line.1477"></a>
-<span class="sourceLineNo">1478</span>        procedure.setFailure(new RemoteProcedureException(msg, e));<a name="line.1478"></a>
-<span class="sourceLineNo">1479</span>      }<a name="line.1479"></a>
-<span class="sourceLineNo">1480</span><a name="line.1480"></a>
-<span class="sourceLineNo">1481</span>      if (!procedure.isFailed()) {<a name="line.1481"></a>
-<span class="sourceLineNo">1482</span>        if (subprocs != null) {<a name="line.1482"></a>
-<span class="sourceLineNo">1483</span>          if (subprocs.length == 1 &amp;&amp; subprocs[0] == procedure) {<a name="line.1483"></a>
-<span class="sourceLineNo">1484</span>            // Procedure returned itself. Quick-shortcut for a state machine-like procedure;<a name="line.1484"></a>
-<span class="sourceLineNo">1485</span>            // i.e. we go around this loop again rather than go back out on the scheduler queue.<a name="line.1485"></a>
-<span class="sourceLineNo">1486</span>            subprocs = null;<a name="line.1486"></a>
-<span class="sourceLineNo">1487</span>            reExecute = true;<a name="line.1487"></a>
-<span class="sourceLineNo">1488</span>            if (LOG.isTraceEnabled()) {<a name="line.1488"></a>
-<span class="sourceLineNo">1489</span>              LOG.trace("Short-circuit to next step on pid=" + procedure.getProcId());<a name="line.1489"></a>
-<span class="sourceLineNo">1490</span>            }<a name="line.1490"></a>
-<span class="sourceLineNo">1491</span>          } else {<a name="line.1491"></a>
-<span class="sourceLineNo">1492</span>            // Yield the current procedure, and make the subprocedure runnable<a name="line.1492"></a>
-<span class="sourceLineNo">1493</span>            // subprocs may come back 'null'.<a name="line.1493"></a>
-<span class="sourceLineNo">1494</span>            subprocs = initializeChildren(procStack, procedure, subprocs);<a name="line.1494"></a>
-<span class="sourceLineNo">1495</span>            LOG.info("Initialized subprocedures=" +<a name="line.1495"></a>
-<span class="sourceLineNo">1496</span>              (subprocs == null? null:<a name="line.1496"></a>
-<span class="sourceLineNo">1497</span>                Stream.of(subprocs).map(e -&gt; "{" + e.toString() + "}").<a name="line.1497"></a>
-<span class="sourceLineNo">1498</span>                collect(Collectors.toList()).toString()));<a name="line.1498"></a>
-<span class="sourceLineNo">1499</span>          }<a name="line.1499"></a>
-<span class="sourceLineNo">1500</span>        } else if (procedure.getState() == ProcedureState.WAITING_TIMEOUT) {<a name="line.1500"></a>
-<span class="sourceLineNo">1501</span>          if (LOG.isTraceEnabled()) {<a name="line.1501"></a>
-<span class="sourceLineNo">1502</span>            LOG.trace("Added to timeoutExecutor " + procedure);<a name="line.1502"></a>
-<span class="sourceLineNo">1503</span>          }<a name="line.1503"></a>
-<span class="sourceLineNo">1504</span>          timeoutExecutor.add(procedure);<a name="line.1504"></a>
-<span class="sourceLineNo">1505</span>        } else if (!suspended) {<a name="line.1505"></a>
-<span class="sourceLineNo">1506</span>          // No subtask, so we are done<a name="line.1506"></a>
-<span class="sourceLineNo">1507</span>          procedure.setState(ProcedureState.SUCCESS);<a name="line.1507"></a>
-<span class="sourceLineNo">1508</span>        }<a name="line.1508"></a>
-<span class="sourceLineNo">1509</span>      }<a name="line.1509"></a>
-<span class="sourceLineNo">1510</span><a name="line.1510"></a>
-<span class="sourceLineNo">1511</span>      // Add the procedure to the stack<a name="line.1511"></a>
-<span class="sourceLineNo">1512</span>      procStack.addRollbackStep(procedure);<a name="line.1512"></a>
-<span class="sourceLineNo">1513</span><a name="line.1513"></a>
-<span class="sourceLineNo">1514</span>      // allows to kill the executor before something is stored to the wal.<a name="line.1514"></a>
-<span class="sourceLineNo">1515</span>      // useful to test the procedure recovery.<a name="line.1515"></a>
-<span class="sourceLineNo">1516</span>      if (testing != null &amp;&amp; testing.shouldKillBeforeStoreUpdate(suspended)) {<a name="line.1516"></a>
-<span class="sourceLineNo">1517</span>        LOG.debug("TESTING: Kill before store update: " + procedure);<a name="line.1517"></a>
-<span class="sourceLineNo">1518</span>        stop();<a name="line.1518"></a>
-<span class="sourceLineNo">1519</span>        return;<a name="line.1519"></a>
-<span class="sourceLineNo">1520</span>      }<a name="line.1520"></a>
-<span class="sourceLineNo">1521</span><a name="line.1521"></a>
-<span class="sourceLineNo">1522</span>      // TODO: The code here doesn't check if store is running before persisting to the store as<a name="line.1522"></a>
-<span class="sourceLineNo">1523</span>      // it relies on the method call below to throw RuntimeException to wind up the stack and<a name="line.1523"></a>
-<span class="sourceLineNo">1524</span>      // executor thread to stop. The statement following the method call below seems to check if<a name="line.1524"></a>
-<span class="sourceLineNo">1525</span>      // store is not running, to prevent scheduling children procedures, re-execution or yield<a name="line.1525"></a>
-<span class="sourceLineNo">1526</span>      // of this procedure. This may need more scrutiny and subsequent cleanup in future<a name="line.1526"></a>
-<span class="sourceLineNo">1527</span>      //<a name="line.1527"></a>
-<span class="sourceLineNo">1528</span>      // Commit the transaction even if a suspend (state may have changed). Note this append<a name="line.1528"></a>
-<span class="sourceLineNo">1529</span>      // can take a bunch of time to complete.<a name="line.1529"></a>
-<span class="sourceLineNo">1530</span>      updateStoreOnExec(procStack, procedure, subprocs);<a name="line.1530"></a>
-<span class="sourceLineNo">1531</span><a name="line.1531"></a>
-<span class="sourceLineNo">1532</span>      // if the store is not running we are aborting<a name="line.1532"></a>
-<span class="sourceLineNo">1533</span>      if (!store.isRunning()) return;<a name="line.1533"></a>
-<span class="sourceLineNo">1534</span>      // if the procedure is kind enough to pass the slot to someone else, yield<a name="line.1534"></a>
-<span class="sourceLineNo">1535</span>      if (procedure.isRunnable() &amp;&amp; !suspended &amp;&amp;<a name="line.1535"></a>
-<span class="sourceLineNo">1536</span>          procedure.isYieldAfterExecutionStep(getEnvironment())) {<a name="line.1536"></a>
-<span class="sourceLineNo">1537</span>        scheduler.yield(procedure);<a name="line.1537"></a>
-<span class="sourceLineNo">1538</span>        return;<a name="line.1538"></a>
-<span class="sourceLineNo">1539</span>      }<a name="line.1539"></a>
-<span class="sourceLineNo">1540</span><a name="line.1540"></a>
-<span class="sourceLineNo">1541</span>      assert (reExecute &amp;&amp; subprocs == null) || !reExecute;<a name="line.1541"></a>
-<span class="sourceLineNo">1542</span>    } while (reExecute);<a name="line.1542"></a>
-<span class="sourceLineNo">1543</span>    // Submit the new subprocedures<a name="line.1543"></a>
-<span class="sourceLineNo">1544</span>    if (subprocs != null &amp;&amp; !procedure.isFailed()) {<a name="line.1544"></a>
-<span class="sourceLineNo">1545</span>      submitChildrenProcedures(subprocs);<a name="line.1545"></a>
-<span class="sourceLineNo">1546</span>    }<a name="line.1546"></a>
-<span class="sourceLineNo">1547</span><a name="line.1547"></a>
-<span class="sourceLineNo">1548</span>    // if the procedure is complete and has a parent, count down the children latch.<a name="line.1548"></a>
-<span class="sourceLineNo">1549</span>    // If 'suspended', do nothing to change state -- let other threads handle unsuspend event.<a name="line.1549"></a>
-<span class="sourceLineNo">1550</span>    if (!suspended &amp;&amp; procedure.isFinished() &amp;&amp; procedure.hasParent()) {<a name="line.1550"></a>
-<span class="sourceLineNo">1551</span>      countDownChildren(procStack, procedure);<a name="line.1551"></a>
-<span class="sourceLineNo">1552</span>    }<a name="line.1552"></a>
-<span class="sourceLineNo">1553</span>  }<a name="line.1553"></a>
-<span class="sourceLineNo">1554</span><a name="line.1554"></a>
-<span class="sourceLineNo">1555</span>  private Procedure[] initializeChildren(final RootProcedureState procStack,<a name="line.1555"></a>
-<span class="sourceLineNo">1556</span>      final Procedure procedure, final Procedure[] subprocs) {<a name="line.1556"></a>
-<span class="sourceLineNo">1557</span>    assert subprocs != null : "expected subprocedures";<a name="line.1557"></a>
-<span class="sourceLineNo">1558</span>    final long rootProcId = getRootProcedureId(procedure);<a name="line.1558"></a>
-<span class="sourceLineNo">1559</span>    for (int i = 0; i &lt; subprocs.length; ++i) {<a name="line.1559"></a>
-<span class="sourceLineNo">1560</span>      final Procedure subproc = subprocs[i];<a name="line.1560"></a>
-<span class="sourceLineNo">1561</span>      if (subproc == null) {<a name="line.1561"></a>
-<span class="sourceLineNo">1562</span>        String msg = "subproc[" + i + "] is null, aborting the procedure";<a name="line.1562"></a>
-<span class="sourceLineNo">1563</span>        procedure.setFailure(new RemoteProcedureException(msg,<a name="line.1563"></a>
-<span class="sourceLineNo">1564</span>          new IllegalArgumentIOException(msg)));<a name="line.1564"></a>
-<span class="sourceLineNo">1565</span>        return null;<a name="line.1565"></a>
-<span class="sourceLineNo">1566</span>      }<a name="line.1566"></a>
-<span class="sourceLineNo">1567</span><a name="line.1567"></a>
-<span class="sourceLineNo">1568</span>      assert subproc.getState() == ProcedureState.INITIALIZING : subproc;<a name="line.1568"></a>
-<span class="sourceLineNo">1569</span>      subproc.setParentProcId(procedure.getProcId());<a name="line.1569"></a>
-<span class="sourceLineNo">1570</span>      subproc.setRootProcId(rootProcId);<a name="line.1570"></a>
-<span class="sourceLineNo">1571</span>      subproc.setProcId(nextProcId());<a name="line.1571"></a>
-<span class="sourceLineNo">1572</span>      procStack.addSubProcedure(subproc);<a name="line.1572"></a>
-<span class="sourceLineNo">1573</span>    }<a name="line.1573"></a>
-<span class="sourceLineNo">1574</span><a name="line.1574"></a>
-<span class="sourceLineNo">1575</span>    if (!procedure.isFailed()) {<a name="line.1575"></a>
-<span class="sourceLineNo">1576</span>      procedure.setChildrenLatch(subprocs.length);<a name="line.1576"></a>
-<span class="sourceLineNo">1577</span>      switch (procedure.getState()) {<a name="line.1577"></a>
-<span class="sourceLineNo">1578</span>        case RUNNABLE:<a name="line.1578"></a>
-<span class="sourceLineNo">1579</span>          procedure.setState(ProcedureState.WAITING);<a name="line.1579"></a>
-<span class="sourceLineNo">1580</span>          break;<a name="line.1580"></a>
-<span class="sourceLineNo">1581</span>        case WAITING_TIMEOUT:<a name="line.1581"></a>
-<span class="sourceLineNo">1582</span>          timeoutExecutor.add(procedure);<a name="line.1582"></a>
-<span class="sourceLineNo">1583</span>          break;<a name="line.1583"></a>
-<span class="sourceLineNo">1584</span>        default:<a name="line.1584"></a>
-<span class="sourceLineNo">1585</span>          break;<a name="line.1585"></a>
-<span class="sourceLineNo">1586</span>      }<a name="line.1586"></a>
-<span class="sourceLineNo">1587</span>    }<a name="line.1587"></a>
-<span class="sourceLineNo">1588</span>    return subprocs;<a name="line.1588"></a>
-<span class="sourceLineNo">1589</span>  }<a name="line.1589"></a>
-<span class="sourceLineNo">1590</span><a name="line.1590"></a>
-<span class="sourceLineNo">1591</span>  private void submitChildrenProcedures(final Procedure[] subprocs) {<a name="line.1591"></a>
-<span class="sourceLineNo">1592</span>    for (int i = 0; i &lt; subprocs.length; ++i) {<a name="line.1592"></a>
-<span class="sourceLineNo">1593</span>      final Procedure subproc = subprocs[i];<a name="line.1593"></a>
-<span class="sourceLineNo">1594</span>      subproc.updateMetricsOnSubmit(getEnvironment());<a name="line.1594"></a>
-<span class="sourceLineNo">1595</span>      assert !procedures.containsKey(subproc.getProcId());<a name="line.1595"></a>
-<span class="sourceLineNo">1596</span>      procedures.put(subproc.getProcId(), subproc);<a name="line.1596"></a>
-<span class="sourceLineNo">1597</span>      scheduler.addFront(subproc);<a name="line.1597"></a>
-<span class="sourceLineNo">1598</span>    }<a name="line.1598"></a>
-<span class="sourceLineNo">1599</span>  }<a name="line.1599"></a>
-<span class="sourceLineNo">1600</span><a name="line.1600"></a>
-<span class="sourceLineNo">1601</span>  private void countDownChildren(final RootProcedureState procStack, final Procedure procedure) {<a name="line.1601"></a>
-<span class="sourceLineNo">1602</span>    final Procedure parent = procedures.get(procedure.getParentProcId());<a name="line.1602"></a>
-<span class="sourceLineNo">1603</span>    if (parent == null) {<a name="line.1603"></a>
-<span class="sourceLineNo">1604</span>      assert procStack.isRollingback();<a name="line.1604"></a>
-<span class="sourceLineNo">1605</span>      return;<a name="line.1605"></a>
-<span class="sourceLineNo">1606</span>    }<a name="line.1606"></a>
-<span class="sourceLineNo">1607</span><a name="line.1607"></a>
-<span class="sourceLineNo">1608</span>    // If this procedure is the last child awake the parent procedure<a name="line.1608"></a>
-<span class="sourceLineNo">1609</span>    if (parent.tryRunnable()) {<a name="line.1609"></a>
-<span class="sourceLineNo">1610</span>      // If we succeeded in making the parent runnable -- i.e. all of its<a name="line.1610"></a>
-<span class="sourceLineNo">1611</span>      // children have completed, move parent to front of the queue.<a name="line.1611"></a>
-<span class="sourceLineNo">1612</span>      store.update(parent);<a name="line.1612"></a>
-<span class="sourceLineNo">1613</span>      scheduler.addFront(parent);<a name="line.1613"></a>
-<span class="sourceLineNo">1614</span>      LOG.info("Finished subprocedure(s) of " + parent + "; resume parent processing.");<a name="line.1614"></a>
-<span class="sourceLineNo">1615</span>      return;<a name="line.1615"></a>
-<span class="sourceLineNo">1616</span>    }<a name="line.1616"></a>
-<span class="sourceLineNo">1617</span>  }<a name="line.1617"></a>
-<span class="sourceLineNo">1618</span><a name="line.1618"></a>
-<span class="sourceLineNo">1619</span>  private void updateStoreOnExec(final RootProcedureState procStack,<a name="line.1619"></a>
-<span class="sourceLineNo">1620</span>      final Procedure procedure, final Procedure[] subprocs) {<a name="line.1620"></a>
-<span class="sourceLineNo">1621</span>    if (subprocs != null &amp;&amp; !procedure.isFailed()) {<a name="line.1621"></a>
-<span class="sourceLineNo">1622</span>      if (LOG.isTraceEnabled()) {<a name="line.1622"></a>
-<span class="sourceLineNo">1623</span>        LOG.trace("Stored " + procedure + ", children " + Arrays.toString(subprocs));<a name="line.1623"></a>
-<span class="sourceLineNo">1624</span>      }<a name="line.1624"></a>
-<span class="sourceLineNo">1625</span>      store.insert(procedure, subprocs);<a name="line.1625"></a>
-<span class="sourceLineNo">1626</span>    } else {<a name="line.1626"></a>
-<span class="sourceLineNo">1627</span>      if (LOG.isTraceEnabled()) {<a name="line.1627"></a>
-<span class="sourceLineNo">1628</span>        LOG.trace("Store update " + procedure);<a name="line.1628"></a>
-<span class="sourceLineNo">1629</span>      }<a name="line.1629"></a>
-<span class="sourceLineNo">1630</span>      if (procedure.isFinished() &amp;&amp; !procedure.hasParent()) {<a name="line.1630"></a>
-<span class="sourceLineNo">1631</span>        // remove child procedures<a name="line.1631"></a>
-<span class="sourceLineNo">1632</span>        final long[] childProcIds = procStack.getSubprocedureIds();<a name="line.1632"></a>
-<span class="sourceLineNo">1633</span>        if (childProcIds != null) {<a name="line.1633"></a>
-<span class="sourceLineNo">1634</span>          store.delete(procedure, childProcIds);<a name="line.1634"></a>
-<span class="sourceLineNo">1635</span>          for (int i = 0; i &lt; childProcIds.length; ++i) {<a name="line.1635"></a>
-<span class="sourceLineNo">1636</span>            procedures.remove(childProcIds[i]);<a name="line.1636"></a>
-<span class="sourceLineNo">1637</span>          }<a name="line.1637"></a>
-<span class="sourceLineNo">1638</span>        } else {<a name="line.1638"></a>
-<span class="sourceLineNo">1639</span>          store.update(procedure);<a name="line.1639"></a>
-<span class="sourceLineNo">1640</span>        }<a name="line.1640"></a>
-<span class="sourceLineNo">1641</span>      } else {<a name="line.1641"></a>
-<span class="sourceLineNo">1642</span>        store.update(procedure);<a name="line.1642"></a>
-<span class="sourceLineNo">1643</span>      }<a name="line.1643"></a>
-<span class="sourceLineNo">1644</span>    }<a name="line.1644"></a>
-<span class="sourceLineNo">1645</span>  }<a name="line.1645"></a>
-<span class="sourceLineNo">1646</span><a name="line.1646"></a>
-<span class="sourceLineNo">1647</span>  private void handleInterruptedException(final Procedure proc, final InterruptedException e) {<a name="line.1647"></a>
-<span class="sourceLineNo">1648</span>    if (LOG.isTraceEnabled()) {<a name="line.1648"></a>
-<span class="sourceLineNo">1649</span>      LOG.trace("Interrupt during " + proc + ". suspend and retry it later.", e);<a name="line.1649"></a>
-<span class="sourceLineNo">1650</span>    }<a name="line.1650"></a>
-<span class="sourceLineNo">1651</span><a name="line.1651"></a>
-<span class="sourceLineNo">1652</span>    // NOTE: We don't call Thread.currentThread().interrupt()<a name="line.1652"></a>
-<span class="sourceLineNo">1653</span>    // because otherwise all the subsequent calls e.g. Thread.sleep() will throw<a name="line.1653"></a>
-<span class="sourceLineNo">1654</span>    // the InterruptedException. If the master is going down, we will be notified<a name="line.1654"></a>
-<span class="sourceLineNo">1655</span>    // and the executor/store will be stopped.<a name="line.1655"></a>
-<span class="sourceLineNo">1656</span>    // (The interrupted procedure will be retried on the next run)<a name="line.1656"></a>
-<span class="sourceLineNo">1657</span>  }<a name="line.1657"></a>
-<span class="sourceLineNo">1658</span><a name="line.1658"></a>
-<span class="sourceLineNo">1659</span>  private void execCompletionCleanup(final Procedure proc) {<a name="line.1659"></a>
-<span class="sourceLineNo">1660</span>    final TEnvironment env = getEnvironment();<a name="line.1660"></a>
-<span class="sourceLineNo">1661</span>    if (proc.holdLock(env) &amp;&amp; proc.hasLock(env)) {<a name="line.1661"></a>
-<span class="sourceLineNo">1662</span>      releaseLock(proc, true);<a name="line.1662"></a>
-<span class="sourceLineNo">1663</span>    }<a name="line.1663"></a>
-<span class="sourceLineNo">1664</span>    try {<a name="line.1664"></a>
-<span class="sourceLineNo">1665</span>      proc.completionCleanup(env);<a name="line.1665"></a>
-<span class="sourceLineNo">1666</span>    } catch (Throwable e) {<a name="line.1666"></a>
-<span class="sourceLineNo">1667</span>      // Catch NullPointerExceptions or similar errors...<a name="line.1667"></a>
-<span class="sourceLineNo">1668</span>      LOG.error("CODE-BUG: uncatched runtime exception for procedure: " + proc, e);<a name="line.1668"></a>
-<span class="sourceLineNo">1669</span>    }<a name="line.1669"></a>
-<span class="sourceLineNo">1670</span>  }<a name="line.1670"></a>
-<span class="sourceLineNo">1671</span><a name="line.1671"></a>
-<span class="sourceLineNo">1672</span>  private void procedureFinished(final Procedure proc) {<a name="line.1672"></a>
-<span class="sourceLineNo">1673</span>    // call the procedure completion cleanup handler<a name="line.1673"></a>
-<span class="sourceLineNo">1674</span>    execCompletionCleanup(proc);<a name="line.1674"></a>
-<span class="sourceLineNo">1675</span><a name="line.1675"></a>
-<span class="sourceLineNo">1676</span>    CompletedProcedureRetainer retainer = new CompletedProcedureRetainer(proc);<a name="line.1676"></a>
-<span class="sourceLineNo">1677</span><a name="line.1677"></a>
-<span class="sourceLineNo">1678</span>    // update the executor internal state maps<a name="line.1678"></a>
-<span class="sourceLineNo">1679</span>    if (!proc.shouldWaitClientAck(getEnvironment())) {<a name="line.1679"></a>
-<span class="sourceLineNo">1680</span>      retainer.setClientAckTime(0);<a name="line.1680"></a>
-<span class="sourceLineNo">1681</span>    }<a name="line.1681"></a>
-<span class="sourceLineNo">1682</span><a name="line.1682"></a>
-<span class="sourceLineNo">1683</span>    completed.put(proc.getProcId(), retainer);<a name="line.1683"></a>
-<span class="sourceLineNo">1684</span>    rollbackStack.remove(proc.getProcId());<a name="line.1684"></a>
-<span class="sourceLineNo">1685</span>    procedures.remove(proc.getProcId());<a name="line.1685"></a>
-<span class="sourceLineNo">1686</span><a name="line.1686"></a>
-<span class="sourceLineNo">1687</span>    // call the runnableSet completion cleanup handler<a name="line.1687"></a>
-<span class="sourceLineNo">1688</span>    try {<a name="line.1688"></a>
-<span class="sourceLineNo">1689</span>      scheduler.completionCleanup(proc);<a name="line.1689"></a>
-<span class="sourceLineNo">1690</span>    } catch (Throwable e) {<a name="line.1690"></a>
-<span class="sourceLineNo">1691</span>      // Catch NullPointerExceptions or similar errors...<a name="line.1691"></a>
-<span class="sourceLineNo">1692</span>      LOG.error("CODE-BUG: uncatched runtime exception for completion cleanup: " + proc, e);<a name="line.1692"></a>
-<span class="sourceLineNo">1693</span>    }<a name="line.1693"></a>
-<span class="sourceLineNo">1694</span><a name="line.1694"></a>
-<span class="sourceLineNo">1695</span>    // Notify the listeners<a name="line.1695"></a>
-<span class="sourceLineNo">1696</span>    sendProcedureFinishedNotification(proc.getProcId());<a name="line.1696"></a>
-<span class="sourceLineNo">1697</span>  }<a name="line.1697"></a>
-<span class="sourceLineNo">1698</span><a name="line.1698"></a>
-<span class="sourceLineNo">1699</span>  RootProcedureState getProcStack(long rootProcId) {<a name="line.1699"></a>
-<span class="sourceLineNo">1700</span>    return rollbackStack.get(rootProcId);<a name="line.1700"></a>
-<span class="sourceLineNo">1701</span>  }<a name="line.1701"></a>
-<span class="sourceLineNo">1702</span><a name="line.1702"></a>
-<span class="sourceLineNo">1703</span>  // ==========================================================================<a name="line.1703"></a>
-<span class="sourceLineNo">1704</span>  //  Worker Thread<a name="line.1704"></a>
-<span class="sourceLineNo">1705</span>  // ==========================================================================<a name="line.1705"></a>
-<span class="sourceLineNo">1706</span>  private class WorkerThread extends StoppableThread {<a name="line.1706"></a>
-<span class="sourceLineNo">1707</span>    private final AtomicLong executionStartTime = new AtomicLong(Long.MAX_VALUE);<a name="line.1707"></a>
-<span class="sourceLineNo">1708</span>    private volatile Procedure&lt;?&gt; activeProcedure;<a name="line.1708"></a>
-<span class="sourceLineNo">1709</span><a name="line.1709"></a>
-<span class="sourceLineNo">1710</span>    public WorkerThread(ThreadGroup group) {<a name="line.1710"></a>
-<span class="sourceLineNo">1711</span>      this(group, "PEWorker-");<a name="line.1711"></a>
-<span class="sourceLineNo">1712</span>    }<a name="line.1712"></a>
-<span class="sourceLineNo">1713</span><a name="line.1713"></a>
-<span class="sourceLineNo">1714</span>    protected WorkerThread(ThreadGroup group, String prefix) {<a name="line.1714"></a>
-<span class="sourceLineNo">1715</span>      super(group, prefix + workerId.incrementAndGet());<a name="line.1715"></a>
-<span class="sourceLineNo">1716</span>      setDaemon(true);<a name="line.1716"></a>
-<span class="sourceLineNo">1717</span>    }<a name="line.1717"></a>
-<span class="sourceLineNo">1718</span><a name="line.1718"></a>
-<span class="sourceLineNo">1719</span>    @Override<a name="line.1719"></a>
-<span class="sourceLineNo">1720</span>    public void sendStopSignal() {<a name="line.1720"></a>
-<span class="sourceLineNo">1721</span>      scheduler.signalAll();<a name="line.1721"></a>
-<span class="sourceLineNo">1722</span>    }<a name="line.1722"></a>
-<span class="sourceLineNo">1723</span><a name="line.1723"></a>
-<span class="sourceLineNo">1724</span>    @Override<a name="line.1724"></a>
-<span class="sourceLineNo">1725</span>    public void run() {<a name="line.1725"></a>
-<span class="sourceLineNo">1726</span>      long lastUpdate = EnvironmentEdgeManager.currentTime();<a name="line.1726"></a>
-<span class="sourceLineNo">1727</span>      try {<a name="line.1727"></a>
-<span class="sourceLineNo">1728</span>        while (isRunning() &amp;&amp; keepAlive(lastUpdate)) {<a name="line.1728"></a>
-<span class="sourceLineNo">1729</span>          Procedure&lt;?&gt; proc = scheduler.poll(keepAliveTime, TimeUnit.MILLISECONDS);<a name="line.1729"></a>
-<span class="sourceLineNo">1730</span>          if (proc == null) {<a name="line.1730"></a>
-<span class="sourceLineNo">1731</span>            continue;<a name="line.1731"></a>
-<span class="sourceLineNo">1732</span>          }<a name="line.1732"></a>
-<span class="sourceLineNo">1733</span>          this.activeProcedure = proc;<a name="line.1733"></a>
-<span class="sourceLineNo">1734</span>          int activeCount = activeExecutorCount.incrementAndGet();<a name="line.1734"></a>
-<span class="sourceLineNo">1735</span>          int runningCount = store.setRunningProcedureCount(activeCount);<a name="line.1735"></a>
-<span class="sourceLineNo">1736</span>          LOG.trace("Execute pid={} runningCount={}, activeCount={}", proc.getProcId(),<a name="line.1736"></a>
-<span class="sourceLineNo">1737</span>            runningCount, activeCount);<a name="line.1737"></a>
-<span class="sourceLineNo">1738</span>          executionStartTime.set(EnvironmentEdgeManager.currentTime());<a name="line.1738"></a>
-<span class="sourceLineNo">1739</span>          try {<a name="line.1739"></a>
-<span class="sourceLineNo">1740</span>            executeProcedure(proc);<a name="line.1740"></a>
-<span class="sourceLineNo">1741</span>          } catch (AssertionError e) {<a name="line.1741"></a>
-<span class="sourceLineNo">1742</span>            LOG.info("ASSERT pid=" + proc.getProcId(), e);<a name="line.1742"></a>
-<span class="sourceLineNo">1743</span>            throw e;<a name="line.1743"></a>
-<span class="sourceLineNo">1744</span>          } finally {<a name="line.1744"></a>
-<span class="sourceLineNo">1745</span>            activeCount = activeExecutorCount.decrementAndGet();<a name="line.1745"></a>
-<span class="sourceLineNo">1746</span>            runningCount = store.setRunningProcedureCount(activeCount);<a name="line.1746"></a>
-<span class="sourceLineNo">1747</span>            LOG.trace("Halt pid={} runningCount={}, activeCount={}", proc.getProcId(),<a name="line.1747"></a>
-<span class="sourceLineNo">1748</span>              runningCount, activeCount);<a name="line.1748"></a>
-<span class="sourceLineNo">1749</span>            this.activeProcedure = null;<a name="line.1749"></a>
-<span class="sourceLineNo">1750</span>            lastUpdate = EnvironmentEdgeManager.currentTime();<a name="line.1750"></a>
-<span class="sourceLineNo">1751</span>            executionStartTime.set(Long.MAX_VALUE);<a name="line.1751"></a>
-<span class="sourceLineNo">1752</span>          }<a name="line.1752"></a>
-<span class="sourceLineNo">1753</span>        }<a name="line.1753"></a>
-<span class="sourceLineNo">1754</span>      } catch (Throwable t) {<a name="line.1754"></a>
-<span class="sourceLineNo">1755</span>        LOG.warn("Worker terminating UNNATURALLY {}", this.activeProcedure, t);<a name="line.1755"></a>
-<span class="sourceLineNo">1756</span>      } finally {<a name="line.1756"></a>
-<span class="sourceLineNo">1757</span>        LOG.trace("Worker terminated.");<a name="line.1757"></a>
-<span class="sourceLineNo">1758</span>      }<a name="line.1758"></a>
-<span class="sourceLineNo">1759</span>      workerThreads.remove(this);<a name="line.1759"></a>
-<span class="sourceLineNo">1760</span>    }<a name="line.1760"></a>
-<span class="sourceLineNo">1761</span><a name="line.1761"></a>
-<span class="sourceLineNo">1762</span>    @Override<a name="line.1762"></a>
-<span class="sourceLineNo">1763</span>    public String toString() {<a name="line.1763"></a>
-<span class="sourceLineNo">1764</span>      Procedure&lt;?&gt; p = this.activeProcedure;<a name="line.1764"></a>
-<span class="sourceLineNo">1765</span>      return getName() + "(pid=" + (p == null? Procedure.NO_PROC_ID: p.getProcId() + ")");<a name="line.1765"></a>
-<span class="sourceLineNo">1766</span>    }<a name="line.1766"></a>
-<span class="sourceLineNo">1767</span><a name="line.1767"></a>
-<span class="sourceLineNo">1768</span>    /**<a name="line.1768"></a>
-<span class="sourceLineNo">1769</span>     * @return the time since the current procedure is running<a name="line.1769"></a>
-<span class="sourceLineNo">1770</span>     */<a name="line.1770"></a>
-<span class="sourceLineNo">1771</span>    public long getCurrentRunTime() {<a name="line.1771"></a>
-<span class="sourceLineNo">1772</span>      return EnvironmentEdgeManager.currentTime() - executionStartTime.get();<a name="line.1772"></a>
-<span class="sourceLineNo">1773</span>    }<a name="line.1773"></a>
-<span class="sourceLineNo">1774</span><a name="line.1774"></a>
-<span class="sourceLineNo">1775</span>    // core worker never timeout<a name="line.1775"></a>
-<span class="sourceLineNo">1776</span>    protected boolean keepAlive(long lastUpdate) {<a name="line.1776"></a>
-<span class="sourceLineNo">1777</span>      return true;<a name="line.1777"></a>
-<span class="sourceLineNo">1778</span>    }<a name="line.1778"></a>
-<span class="sourceLineNo">1779</span>  }<a name="line.1779"></a>
-<span class="sourceLineNo">1780</span><a name="line.1780"></a>
-<span class="sourceLineNo">1781</span>  // A worker thread which can be added when core workers are stuck. Will timeout after<a name="line.1781"></a>
-<span class="sourceLineNo">1782</span>  // keepAliveTime if there is no procedure to run.<a name="line.1782"></a>
-<span class="sourceLineNo">1783</span>  private final class KeepAliveWorkerThread extends WorkerThread {<a name="line.1783"></a>
-<span class="sourceLineNo">1784</span><a name="line.1784"></a>
-<span class="sourceLineNo">1785</span>    public KeepAliveWorkerThread(ThreadGroup group) {<a name="line.1785"></a>
-<span class="sourceLineNo">1786</span>      super(group, "KeepAlivePEWorker-");<a name="line.1786"></a>
-<span class="sourceLineNo">1787</span>    }<a name="line.1787"></a>
-<span class="sourceLineNo">1788</span><a name="line.1788"></a>
-<span class="sourceLineNo">1789</span>    @Override<a name="line.1789"></a>
-<span class="sourceLineNo">1790</span>    protected boolean keepAlive(long lastUpdate) {<a name="line.1790"></a>
-<span class="sourceLineNo">1791</span>      return EnvironmentEdgeManager.currentTime() - lastUpdate &lt; keepAliveTime;<a name="line.1791"></a>
-<span class="sourceLineNo">1792</span>    }<a name="line.1792"></a>
-<span class="sourceLineNo">1793</span>  }<a name="line.1793"></a>
-<span class="sourceLineNo">1794</span><a name="line.1794"></a>
-<span class="sourceLineNo">1795</span>  // ----------------------------------------------------------------------------<a name="line.1795"></a>
-<span class="sourceLineNo">1796</span>  // TODO-MAYBE: Should we provide a InlineChore to notify the store with the<a name="line.1796"></a>
-<span class="sourceLineNo">1797</span>  // full set of procedures pending and completed to write a compacted<a name="line.1797"></a>
-<span class="sourceLineNo">1798</span>  // version of the log (in case is a log)?<a name="line.1798"></a>
-<span class="sourceLineNo">1799</span>  // In theory no, procedures are have a short life, so at some point the store<a name="line.1799"></a>
-<span class="sourceLineNo">1800</span>  // will have the tracker saying everything is in the last log.<a name="line.1800"></a>
-<span class="sourceLineNo">1801</span>  // ----------------------------------------------------------------------------<a name="line.1801"></a>
-<span class="sourceLineNo">1802</span><a name="line.1802"></a>
-<span class="sourceLineNo">1803</span>  private final class WorkerMonitor extends InlineChore {<a name="line.1803"></a>
-<span class="sourceLineNo">1804</span>    public static final String WORKER_MONITOR_INTERVAL_CONF_KEY =<a name="line.1804"></a>
-<span class="sourceLineNo">1805</span>        "hbase.procedure.worker.monitor.interval.msec";<a name="line.1805"></a>
-<span class="sourceLineNo">1806</span>    private static final int DEFAULT_WORKER_MONITOR_INTERVAL = 5000; // 5sec<a name="line.1806"></a>
-<span class="sourceLineNo">1807</span><a name="line.1807"></a>
-<span class="sourceLineNo">1808</span>    public static final String WORKER_STUCK_THRESHOLD_CONF_KEY =<a name="line.1808"></a>
-<span class="sourceLineNo">1809</span>        "hbase.procedure.worker.stuck.threshold.msec";<a name="line.1809"></a>
-<span class="sourceLineNo">1810</span>    private static final int DEFAULT_WORKER_STUCK_THRESHOLD = 10000; // 10sec<a name="line.1810"></a>
-<span class="sourceLineNo">1811</span><a name="line.1811"></a>
-<span class="sourceLineNo">1812</span>    public static final String WORKER_ADD_STUCK_PERCENTAGE_CONF_KEY =<a name="line.1812"></a>
-<span class="sourceLineNo">1813</span>        "hbase.procedure.worker.add.stuck.percentage";<a name="line.1813"></a>
-<span class="sourceLineNo">1814</span>    private static final float DEFAULT_WORKER_ADD_STUCK_PERCENTAGE = 0.5f; // 50% stuck<a name="line.1814"></a>
-<span class="sourceLineNo">1815</span><a name="line.1815"></a>
-<span class="sourceLineNo">1816</span>    private float addWorkerStuckPercentage = DEFAULT_WORKER_ADD_STUCK_PERCENTAGE;<a name="line.1816"></a>
-<span class="sourceLineNo">1817</span>    private int timeoutInterval = DEFAULT_WORKER_MONITOR_INTERVAL;<a name="line.1817"></a>
-<span class="sourceLineNo">1818</span>    private int stuckThreshold = DEFAULT_WORKER_STUCK_THRESHOLD;<a name="line.1818"></a>
-<span class="sourceLineNo">1819</span><a name="line.1819"></a>
-<span class="sourceLineNo">1820</span>    public WorkerMonitor() {<a name="line.1820"></a>
-<span class="sourceLineNo">1821</span>      refreshConfig();<a name="line.1821"></a>
-<span class="sourceLineNo">1822</span>    }<a name="line.1822"></a>
-<span class="sourceLineNo">1823</span><a name="line.1823"></a>
-<span class="sourceLineNo">1824</span>    @Override<a name="line.1824"></a>
-<span class="sourceLineNo">1825</span>    public void run() {<a name="line.1825"></a>
-<span class="sourceLineNo">1826</span>      final int stuckCount = checkForStuckWorkers();<a name="line.1826"></a>
-<span class="sourceLineNo">1827</span>      checkThreadCount(stuckCount);<a name="line.1827"></a>
-<span class="sourceLineNo">1828</span><a name="line.1828"></a>
-<span class="sourceLineNo">1829</span>      // refresh interval (poor man dynamic conf update)<a name="line.1829"></a>
-<span class="sourceLineNo">1830</span>      refreshConfig();<a name="line.1830"></a>
-<span class="sourceLineNo">1831</span>    }<a name="line.1831"></a>
-<span class="sourceLineNo">1832</span><a name="line.1832"></a>
-<span class="sourceLineNo">1833</span>    private int checkForStuckWorkers() {<a name="line.1833"></a>
-<span class="sourceLineNo">1834</span>      // check if any of the worker is stuck<a name="line.1834"></a>
-<span class="sourceLineNo">1835</span>      int stuckCount = 0;<a name="line.1835"></a>
-<span class="sourceLineNo">1836</span>      for (WorkerThread worker : workerThreads) {<a name="line.1836"></a>
-<span class="sourceLineNo">1837</span>        if (worker.getCurrentRunTime() &lt; stuckThreshold) {<a name="line.1837"></a>
-<span class="sourceLineNo">1838</span>          continue;<a name="line.1838"></a>
-<span class="sourceLineNo">1839</span>        }<a name="line.1839"></a>
-<span class="sourceLineNo">1840</span><a name="line.1840"></a>
-<span class="sourceLineNo">1841</span>        // WARN the worker is stuck<a name="line.1841"></a>
-<span class="sourceLineNo">1842</span>        stuckCount++;<a name="line.1842"></a>
-<span class="sourceLineNo">1843</span>        LOG.warn("Worker stuck {} run time {}", worker,<a name="line.1843"></a>
-<span class="sourceLineNo">1844</span>          StringUtils.humanTimeDiff(worker.getCurrentRunTime()));<a name="line.1844"></a>
-<span class="sourceLineNo">1845</span>      }<a name="line.1845"></a>
-<span class="sourceLineNo">1846</span>      return stuckCount;<a name="line.1846"></a>
-<span class="sourceLineNo">1847</span>    }<a name="line.1847"></a>
-<span class="sourceLineNo">1848</span><a name="line.1848"></a>
-<span class="sourceLineNo">1849</span>    private void checkThreadCount(final int stuckCount) {<a name="line.1849"></a>
-<span class="sourceLineNo">1850</span>      // nothing to do if there are no runnable tasks<a name="line.1850"></a>
-<span class="sourceLineNo">1851</span>      if (stuckCount &lt; 1 || !scheduler.hasRunnables()) {<a name="line.1851"></a>
-<span class="sourceLineNo">1852</span>        return;<a name="line.1852"></a>
-<span class="sourceLineNo">1853</span>      }<a name="line.1853"></a>
-<span class="sourceLineNo">1854</span><a name="line.1854"></a>
-<span class="sourceLineNo">1855</span>      // add a new thread if the worker stuck percentage exceed the threshold limit<a name="line.1855"></a>
-<span class="sourceLineNo">1856</span>      // and every handler is active.<a name="line.1856"></a>
-<span class="sourceLineNo">1857</span>      final float stuckPerc = ((float) stuckCount) / workerThreads.size();<a name="line.1857"></a>
-<span class="sourceLineNo">1858</span>      // let's add new worker thread more aggressively, as they will timeout finally if there is no<a name="line.1858"></a>
-<span class="sourceLineNo">1859</span>      // work to do.<a name="line.1859"></a>
-<span class="sourceLineNo">1860</span>      if (stuckPerc &gt;= addWorkerStuckPercentage &amp;&amp; workerThreads.size() &lt; maxPoolSize) {<a name="line.1860"></a>
-<span class="sourceLineNo">1861</span>        final KeepAliveWorkerThread worker = new KeepAliveWorkerThread(threadGroup);<a name="line.1861"></a>
-<span class="sourceLineNo">1862</span>        workerThreads.add(worker);<a name="line.1862"></a>
-<span class="sourceLineNo">1863</span>        worker.start();<a name="line.1863"></a>
-<span class="sourceLineNo">1864</span>        LOG.debug("Added new worker thread {}", worker);<a name="line.1864"></a>
-<span class="sourceLineNo">1865</span>      }<a name="line.1865"></a>
-<span class="sourceLineNo">1866</span>    }<a name="line.1866"></a>
-<span class="sourceLineNo">1867</span><a name="line.1867"></a>
-<span class="sourceLineNo">1868</span>    private void refreshConfig() {<a name="line.1868"></a>
-<span class="sourceLineNo">1869</span>      addWorkerStuckPercentage = conf.getFloat(WORKER_ADD_STUCK_PERCENTAGE_CONF_KEY,<a name="line.1869"></a>
-<span class="sourceLineNo">1870</span>          DEFAULT_WORKER_ADD_STUCK_PERCENTAGE);<a name="line.1870"></a>
-<span class="sourceLineNo">1871</span>      timeoutInterval = conf.getInt(WORKER_MONITOR_INTERVAL_CONF_KEY,<a name="line.1871"></a>
-<span class="sourceLineNo">1872</span>        DEFAULT_WORKER_MONITOR_INTERVAL);<a name="line.1872"></a>
-<span class="sourceLineNo">1873</span>      stuckThreshold = conf.getInt(WORKER_STUCK_THRESHOLD_CONF_KEY,<a name="line.1873"></a>
-<span class="sourceLineNo">1874</span>        DEFAULT_WORKER_STUCK_THRESHOLD);<a name="line.1874"></a>
-<span class="sourceLineNo">1875</span>    }<a name="line.1875"></a>
-<span class="sourceLineNo">1876</span><a name="line.1876"></a>
-<span class="sourceLineNo">1877</span>    @Override<a name="line.1877"></a>
-<span class="sourceLineNo">1878</span>    public int getTimeoutInterval() {<a name="line.1878"></a>
-<span class="sourceLineNo">1879</span>      return timeoutInterval;<a name="line.1879"></a>
-<span class="sourceLineNo">1880</span>    }<a name="line.1880"></a>
-<span class="sourceLineNo">1881</span>  }<a name="line.1881"></a>
-<span class="sourceLineNo">1882</span>}<a name="line.1882"></a>
+<span class="sourceLineNo">1430</span>    Preconditions.checkArgument(procedure.getState() == ProcedureState.RUNNABLE,<a name="line.1430"></a>
+<span class="sourceLineNo">1431</span>        procedure.toString());<a name="line.1431"></a>
+<span class="sourceLineNo">1432</span><a name="line.1432"></a>
+<span class="sourceLineNo">1433</span>    // Procedures can suspend themselves. They skip out by throwing a ProcedureSuspendedException.<a name="line.1433"></a>
+<span class="sourceLineNo">1434</span>    // The exception is caught below and then we hurry to the exit without disturbing state. The<a name="line.1434"></a>
+<span class="sourceLineNo">1435</span>    // idea is that the processing of this procedure will be unsuspended later by an external event<a name="line.1435"></a>
+<span class="sourceLineNo">1436</span>    // such the report of a region open. TODO: Currently, its possible for two worker threads<a name="line.1436"></a>
+<span class="sourceLineNo">1437</span>    // to be working on the same procedure concurrently (locking in procedures is NOT about<a name="line.1437"></a>
+<span class="sourceLineNo">1438</span>    // concurrency but about tying an entity to a procedure; i.e. a region to a particular<a name="line.1438"></a>
+<span class="sourceLineNo">1439</span>    // procedure instance). This can make for issues if both threads are changing state.<a name="line.1439"></a>
+<span class="sourceLineNo">1440</span>    // See env.getProcedureScheduler().wakeEvent(regionNode.getProcedureEvent());<a name="line.1440"></a>
+<span class="sourceLineNo">1441</span>    // in RegionTransitionProcedure#reportTransition for example of Procedure putting<a name="line.1441"></a>
+<span class="sourceLineNo">1442</span>    // itself back on the scheduler making it possible for two threads running against<a name="line.1442"></a>
+<span class="sourceLineNo">1443</span>    // the one Procedure. Might be ok if they are both doing different, idempotent sections.<a name="line.1443"></a>
+<span class="sourceLineNo">1444</span>    boolean suspended = false;<a name="line.1444"></a>
+<span class="sourceLineNo">1445</span><a name="line.1445"></a>
+<span class="sourceLineNo">1446</span>    // Whether to 're-' -execute; run through the loop again.<a name="line.1446"></a>
+<span class="sourceLineNo">1447</span>    boolean reExecute = false;<a name="line.1447"></a>
+<span class="sourceLineNo">1448</span><a name="line.1448"></a>
+<span class="sourceLineNo">1449</span>    Procedure&lt;TEnvironment&gt;[] subprocs = null;<a name="line.1449"></a>
+<span class="sourceLineNo">1450</span>    do {<a name="line.1450"></a>
+<span class="sourceLineNo">1451</span>      reExecute = false;<a name="line.1451"></a>
+<span class="sourceLineNo">1452</span>      try {<a name="line.1452"></a>
+<span class="sourceLineNo">1453</span>        subprocs = procedure.doExecute(getEnvironment());<a name="line.1453"></a>
+<span class="sourceLineNo">1454</span>        if (subprocs != null &amp;&amp; subprocs.length == 0) {<a name="line.1454"></a>
+<span class="sourceLineNo">1455</span>          subprocs = null;<a name="line.1455"></a>
+<span class="sourceLineNo">1456</span>        }<a name="line.1456"></a>
+<span class="sourceLineNo">1457</span>      } catch (ProcedureSuspendedException e) {<a name="line.1457"></a>
+<span class="sourceLineNo">1458</span>        if (LOG.isTraceEnabled()) {<a name="line.1458"></a>
+<span class="sourceLineNo">1459</span>          LOG.trace("Suspend " + procedure);<a name="line.1459"></a>
+<span class="sourceLineNo">1460</span>        }<a name="line.1460"></a>
+<span class="sourceLineNo">1461</span>        suspended = true;<a name="line.1461"></a>
+<span class="sourceLineNo">1462</span>      } catch (ProcedureYieldException e) {<a name="line.1462"></a>
+<span class="sourceLineNo">1463</span>        if (LOG.isTraceEnabled()) {<a name="line.1463"></a>
+<span class="sourceLineNo">1464</span>          LOG.trace("Yield " + procedure + ": " + e.getMessage(), e);<a name="line.1464"></a>
+<span class="sourceLineNo">1465</span>        }<a name="line.1465"></a>
+<span class="sourceLineNo">1466</span>        scheduler.yield(procedure);<a name="line.1466"></a>
+<span class="sourceLineNo">1467</span>        return;<a name="line.1467"></a>
+<span class="sourceLineNo">1468</span>      } catch (InterruptedException e) {<a name="line.1468"></a>
+<span class="sourceLineNo">1469</span>        if (LOG.isTraceEnabled()) {<a name="line.1469"></a>
+<span class="sourceLineNo">1470</span>          LOG.trace("Yield interrupt " + procedure + ": " + e.getMessage(), e);<a name="line.1470"></a>
+<span class="sourceLineNo">1471</span>        }<a name="line.1471"></a>
+<span class="sourceLineNo">1472</span>        handleInterruptedException(procedure, e);<a name="line.1472"></a>
+<span class="sourceLineNo">1473</span>        scheduler.yield(procedure);<a name="line.1473"></a>
+<span class="sourceLineNo">1474</span>        return;<a name="line.1474"></a>
+<span class="sourceLineNo">1475</span>      } catch (Throwable e) {<a name="line.1475"></a>
+<span class="sourceLineNo">1476</span>        // Catch NullPointerExceptions or similar errors...<a name="line.1476"></a>
+<span class="sourceLineNo">1477</span>        String msg = "CODE-BUG: Uncaught runtime exception: " + procedure;<a name="line.1477"></a>
+<span class="sourceLineNo">1478</span>        LOG.error(msg, e);<a name="line.1478"></a>
+<span class="sourceLineNo">1479</span>        procedure.setFailure(new RemoteProcedureException(msg, e));<a name="line.1479"></a>
+<span class="sourceLineNo">1480</span>      }<a name="line.1480"></a>
+<span class="sourceLineNo">1481</span><a name="line.1481"></a>
+<span class="sourceLineNo">1482</span>      if (!procedure.isFailed()) {<a name="line.1482"></a>
+<span class="sourceLineNo">1483</span>        if (subprocs != null) {<a name="line.1483"></a>
+<span class="sourceLineNo">1484</span>          if (subprocs.length == 1 &amp;&amp; subprocs[0] == procedure) {<a name="line.1484"></a>
+<span class="sourceLineNo">1485</span>            // Procedure returned itself. Quick-shortcut for a state machine-like procedure;<a name="line.1485"></a>
+<span class="sourceLineNo">1486</span>            // i.e. we go around this loop again rather than go back out on the scheduler queue.<a name="line.1486"></a>
+<span class="sourceLineNo">1487</span>            subprocs = null;<a name="line.1487"></a>
+<span class="sourceLineNo">1488</span>            reExecute = true;<a name="line.1488"></a>
+<span class="sourceLineNo">1489</span>            if (LOG.isTraceEnabled()) {<a name="line.1489"></a>
+<span class="sourceLineNo">1490</span>              LOG.trace("Short-circuit to next step on pid=" + procedure.getProcId());<a name="line.1490"></a>
+<span class="sourceLineNo">1491</span>            }<a name="line.1491"></a>
+<span class="sourceLineNo">1492</span>          } else {<a name="line.1492"></a>
+<span class="sourceLineNo">1493</span>            // Yield the current procedure, and make the subprocedure runnable<a name="line.1493"></a>
+<span class="sourceLineNo">1494</span>            // subprocs may come back 'null'.<a name="line.1494"></a>
+<span class="sourceLineNo">1495</span>            subprocs = initializeChildren(procStack, procedure, subprocs);<a name="line.1495"></a>
+<span class="sourceLineNo">1496</span>            LOG.info("Initialized subprocedures=" +<a name="line.1496"></a>
+<span class="sourceLineNo">1497</span>              (subprocs == null? null:<a name="line.1497"></a>
+<span class="sourceLineNo">1498</span>                Stream.of(subprocs).map(e -&gt; "{" + e.toString() + "}").<a name="line.1498"></a>
+<span class="sourceLineNo">1499</span>                collect(Collectors.toList()).toString()));<a name="line.1499"></a>
+<span class="sourceLineNo">1500</span>          }<a name="line.1500"></a>
+<span class="sourceLineNo">1501</span>        } else if (procedure.getState() == ProcedureState.WAITING_TIMEOUT) {<a name="line.1501"></a>
+<span class="sourceLineNo">1502</span>          if (LOG.isTraceEnabled()) {<a name="line.1502"></a>
+<span class="sourceLineNo">1503</span>            LOG.trace("Added to timeoutExecutor " + procedure);<a name="line.1503"></a>
+<span class="sourceLineNo">1504</span>          }<a name="line.1504"></a>
+<span class="sourceLineNo">1505</span>          timeoutExecutor.add(procedure);<a name="line.1505"></a>
+<span class="sourceLineNo">1506</span>        } else if (!suspended) {<a name="line.1506"></a>
+<span class="sourceLineNo">1507</span>          // No subtask, so we are done<a name="line.1507"></a>
+<span class="sourceLineNo">1508</span>          procedure.setState(ProcedureState.SUCCESS);<a name="line.1508"></a>
+<span class="sourceLineNo">1509</span>        }<a name="line.1509"></a>
+<span class="sourceLineNo">1510</span>      }<a name="line.1510"></a>
+<span class="sourceLineNo">1511</span><a name="line.1511"></a>
+<span class="sourceLineNo">1512</span>      // Add the procedure to the stack<a name="line.1512"></a>
+<span class="sourceLineNo">1513</span>      procStack.addRollbackStep(procedure);<a name="line.1513"></a>
+<span class="sourceLineNo">1514</span><a name="line.1514"></a>
+<span class="sourceLineNo">1515</span>      // allows to kill the executor before something is stored to the wal.<a name="line.1515"></a>
+<span class="sourceLineNo">1516</span>      // useful to test the procedure recovery.<a name="line.1516"></a>
+<span class="sourceLineNo">1517</span>      if (testing != null &amp;&amp; testing.shouldKillBeforeStoreUpdate(suspended)) {<a name="line.1517"></a>
+<span class="sourceLineNo">1518</span>        LOG.debug("TESTING: Kill before store update: " + procedure);<a name="line.1518"></a>
+<span class="sourceLineNo">1519</span>        stop();<a name="line.1519"></a>
+<span class="sourceLineNo">1520</span>        return;<a name="line.1520"></a>
+<span class="sourceLineNo">1521</span>      }<a name="line.1521"></a>
+<span class="sourceLineNo">1522</span><a name="line.1522"></a>
+<span class="sourceLineNo">1523</span>      // TODO: The code here doesn't check if store is running before persisting to the store as<a name="line.1523"></a>
+<span class="sourceLineNo">1524</span>      // it relies on the method call below to throw RuntimeException to wind up the stack and<a name="line.1524"></a>
+<span class="sourceLineNo">1525</span>      // executor thread to stop. The statement following the method call below seems to check if<a name="line.1525"></a>
+<span class="sourceLineNo">1526</span>      // store is not running, to prevent scheduling children procedures, re-execution or yield<a name="line.1526"></a>
+<span class="sourceLineNo">1527</span>      // of this procedure. This may need more scrutiny and subsequent cleanup in future<a name="line.1527"></a>
+<span class="sourceLineNo">1528</span>      //<a name="line.1528"></a>
+<span class="sourceLineNo">1529</span>      // Commit the transaction even if a suspend (state may have changed). Note this append<a name="line.1529"></a>
+<span class="sourceLineNo">1530</span>      // can take a bunch of time to complete.<a name="line.1530"></a>
+<span class="sourceLineNo">1531</span>      updateStoreOnExec(procStack, procedure, subprocs);<a name="line.1531"></a>
+<span class="sourceLineNo">1532</span><a name="line.1532"></a>
+<span class="sourceLineNo">1533</span>      // if the store is not running we are aborting<a name="line.1533"></a>
+<span class="sourceLineNo">1534</span>      if (!store.isRunning()) return;<a name="line.1534"></a>
+<span class="sourceLineNo">1535</span>      // if the procedure is kind enough to pass the slot to someone else, yield<a name="line.1535"></a>
+<span class="sourceLineNo">1536</span>      if (procedure.isRunnable() &amp;&amp; !suspended &amp;&amp;<a name="line.1536"></a>
+<span class="sourceLineNo">1537</span>          procedure.isYieldAfterExecutionStep(getEnvironment())) {<a name="line.1537"></a>
+<span class="sourceLineNo">1538</span>        scheduler.yield(procedure);<a name="line.1538"></a>
+<span class="sourceLineNo">1539</span>        return;<a name="line.1539"></a>
+<span class="sourceLineNo">1540</span>      }<a name="line.1540"></a>
+<span class="sourceLineNo">1541</span><a name="line.1541"></a>
+<span class="sourceLineNo">1542</span>      assert (reExecute &amp;&amp; subprocs == null) || !reExecute;<a name="line.1542"></a>
+<span class="sourceLineNo">1543</span>    } while (reExecute);<a name="line.1543"></a>
+<span class="sourceLineNo">1544</span>    // Submit the new subprocedures<a name="line.1544"></a>
+<span class="sourceLineNo">1545</span>    if (subprocs != null &amp;&amp; !procedure.isFailed()) {<a name="line.1545"></a>
+<span class="sourceLineNo">1546</span>      submitChildrenProcedures(subprocs);<a name="line.1546"></a>
+<span class="sourceLineNo">1547</span>    }<a name="line.1547"></a>
+<span class="sourceLineNo">1548</span><a name="line.1548"></a>
+<span class="sourceLineNo">1549</span>    // if the procedure is complete and has a parent, count down the children latch.<a name="line.1549"></a>
+<span class="sourceLineNo">1550</span>    // If 'suspended', do nothing to change state -- let other threads handle unsuspend event.<a name="line.1550"></a>
+<span class="sourceLineNo">1551</span>    if (!suspended &amp;&amp; procedure.isFinished() &amp;&amp; procedure.hasParent()) {<a name="line.1551"></a>
+<span class="sourceLineNo">1552</span>      countDownChildren(procStack, procedure);<a name="line.1552"></a>
+<span class="sourceLineNo">1553</span>    }<a name="line.1553"></a>
+<span class="sourceLineNo">1554</span>  }<a name="line.1554"></a>
+<span class="sourceLineNo">1555</span><a name="line.1555"></a>
+<span class="sourceLineNo">1556</span>  private Procedure[] initializeChildren(final RootProcedureState procStack,<a name="line.1556"></a>
+<span class="sourceLineNo">1557</span>      final Procedure procedure, final Procedure[] subprocs) {<a name="line.1557"></a>
+<span class="sourceLineNo">1558</span>    assert subprocs != null : "expected subprocedures";<a name="line.1558"></a>
+<span class="sourceLineNo">1559</span>    final long rootProcId = getRootProcedureId(procedure);<a name="line.1559"></a>
+<span class="sourceLineNo">1560</span>    for (int i = 0; i &lt; subprocs.length; ++i) {<a name="line.1560"></a>
+<span class="sourceLineNo">1561</span>      final Procedure subproc = subprocs[i];<a name="line.1561"></a>
+<span class="sourceLineNo">1562</span>      if (subproc == null) {<a name="line.1562"></a>
+<span class="sourceLineNo">1563</span>        String msg = "subproc[" + i + "] is null, aborting the procedure";<a name="line.1563"></a>
+<span class="sourceLineNo">1564</span>        procedure.setFailure(new RemoteProcedureException(msg,<a name="line.1564"></a>
+<span class="sourceLineNo">1565</span>          new IllegalArgumentIOException(msg)));<a name="line.1565"></a>
+<span class="sourceLineNo">1566</span>        return null;<a name="line.1566"></a>
+<span class="sourceLineNo">1567</span>      }<a name="line.1567"></a>
+<span class="sourceLineNo">1568</span><a name="line.1568"></a>
+<span class="sourceLineNo">1569</span>      assert subproc.getState() == ProcedureState.INITIALIZING : subproc;<a name="line.1569"></a>
+<span class="sourceLineNo">1570</span>      subproc.setParentProcId(procedure.getProcId());<a name="line.1570"></a>
+<span class="sourceLineNo">1571</span>      subproc.setRootProcId(rootProcId);<a name="line.1571"></a>
+<span class="sourceLineNo">1572</span>      subproc.setProcId(nextProcId());<a name="line.1572"></a>
+<span class="sourceLineNo">1573</span>      procStack.addSubProcedure(subproc);<a name="line.1573"></a>
+<span class="sourceLineNo">1574</span>    }<a name="line.1574"></a>
+<span class="sourceLineNo">1575</span><a name="line.1575"></a>
+<span class="sourceLineNo">1576</span>    if (!procedure.isFailed()) {<a name="line.1576"></a>
+<span class="sourceLineNo">1577</span>      procedure.setChildrenLatch(subprocs.length);<a name="line.1577"></a>
+<span class="sourceLineNo">1578</span>      switch (procedure.getState()) {<a name="line.1578"></a>
+<span class="sourceLineNo">1579</span>        case RUNNABLE:<a name="line.1579"></a>
+<span class="sourceLineNo">1580</span>          procedure.setState(ProcedureState.WAITING);<a name="line.1580"></a>
+<span class="sourceLineNo">1581</span>          break;<a name="line.1581"></a>
+<span class="sourceLineNo">1582</span>        case WAITING_TIMEOUT:<a name="line.1582"></a>
+<span class="sourceLineNo">1583</span>          timeoutExecutor.add(procedure);<a name="line.1583"></a>
+<span class="sourceLineNo">1584</span>          break;<a name="line.1584"></a>
+<span class="sourceLineNo">1585</span>        default:<a name="line.1585"></a>
+<span class="sourceLineNo">1586</span>          break;<a name="line.1586"></a>
+<span class="sourceLineNo">1587</span>      }<a name="line.1587"></a>
+<span class="sourceLineNo">1588</span>    }<a name="line.1588"></a>
+<span class="sourceLineNo">1589</span>    return subprocs;<a name="line.1589"></a>
+<span class="sourceLineNo">1590</span>  }<a name="line.1590"></a>
+<span class="sourceLineNo">1591</span><a name="line.1591"></a>
+<span class="sourceLineNo">1592</span>  private void submitChildrenProcedures(final Procedure[] subprocs) {<a name="line.1592"></a>
+<span class="sourceLineNo">1593</span>    for (int i = 0; i &lt; subprocs.length; ++i) {<a name="line.1593"></a>
+<span class="sourceLineNo">1594</span>      final Procedure subproc = subprocs[i];<a name="line.1594"></a>
+<span class="sourceLineNo">1595</span>      subproc.updateMetricsOnSubmit(getEnvironment());<a name="line.1595"></a>
+<span class="sourceLineNo">1596</span>      assert !procedures.containsKey(subproc.getProcId());<a name="line.1596"></a>
+<span class="sourceLineNo">1597</span>      procedures.put(subproc.getProcId(), subproc);<a name="line.1597"></a>
+<span class="sourceLineNo">1598</span>      scheduler.addFront(subproc);<a name="line.1598"></a>
+<span class="sourceLineNo">1599</span>    }<a name="line.1599"></a>
+<span class="sourceLineNo">1600</span>  }<a name="line.1600"></a>
+<span class="sourceLineNo">1601</span><a name="line.1601"></a>
+<span class="sourceLineNo">1602</span>  private void countDownChildren(final RootProcedureState procStack, final Procedure procedure) {<a name="line.1602"></a>
+<span class="sourceLineNo">1603</span>    final Procedure parent = procedures.get(procedure.getParentProcId());<a name="line.1603"></a>
+<span class="sourceLineNo">1604</span>    if (parent == null) {<a name="line.1604"></a>
+<span class="sourceLineNo">1605</span>      assert procStack.isRollingback();<a name="line.1605"></a>
+<span class="sourceLineNo">1606</span>      return;<a name="line.1606"></a>
+<span class="sourceLineNo">1607</span>    }<a name="line.1607"></a>
+<span class="sourceLineNo">1608</span><a name="line.1608"></a>
+<span class="sourceLineNo">1609</span>    // If this procedure is the last child awake the parent procedure<a name="line.1609"></a>
+<span class="sourceLineNo">1610</span>    if (parent.tryRunnable()) {<a name="line.1610"></a>
+<span class="sourceLineNo">1611</span>      // If we succeeded in making the parent runnable -- i.e. all of its<a name="line.1611"></a>
+<span class="sourceLineNo">1612</span>      // children have completed, move parent to front of the queue.<a name="line.1612"></a>
+<span class="sourceLineNo">1613</span>      store.update(parent);<a name="line.1613"></a>
+<span class="sourceLineNo">1614</span>      scheduler.addFront(parent);<a name="line.1614"></a>
+<span class="sourceLineNo">1615</span>      LOG.info("Finished subprocedure(s) of " + parent + "; resume parent processing.");<a name="line.1615"></a>
+<span class="sourceLineNo">1616</span>      return;<a name="line.1616"></a>
+<span class="sourceLineNo">1617</span>    }<a name="line.1617"></a>
+<span class="sourceLineNo">1618</span>  }<a name="line.1618"></a>
+<span class="sourceLineNo">1619</span><a name="line.1619"></a>
+<span class="sourceLineNo">1620</span>  private void updateStoreOnExec(final RootProcedureState procStack,<a name="line.1620"></a>
+<span class="sourceLineNo">1621</span>      final Procedure procedure, final Procedure[] subprocs) {<a name="line.1621"></a>
+<span class="sourceLineNo">1622</span>    if (subprocs != null &amp;&amp; !procedure.isFailed()) {<a name="line.1622"></a>
+<span class="sourceLineNo">1623</span>      if (LOG.isTraceEnabled()) {<a name="line.1623"></a>
+<span class="sourceLineNo">1624</span>        LOG.trace("Stored " + procedure + ", children " + Arrays.toString(subprocs));<a name="line.1624"></a>
+<span class="sourceLineNo">1625</span>      }<a name="line.1625"></a>
+<span class="sourceLineNo">1626</span>      store.insert(procedure, subprocs);<a name="line.1626"></a>
+<span class="sourceLineNo">1627</span>    } else {<a name="line.1627"></a>
+<span class="sourceLineNo">1628</span>      if (LOG.isTraceEnabled()) {<a name="line.1628"></a>
+<span class="sourceLineNo">1629</span>        LOG.trace("Store update " + procedure);<a name="line.1629"></a>
+<span class="sourceLineNo">1630</span>      }<a name="line.1630"></a>
+<span class="sourceLineNo">1631</span>      if (procedure.isFinished() &amp;&amp; !procedure.hasParent()) {<a name="line.1631"></a>
+<span class="sourceLineNo">1632</span>        // remove child procedures<a name="line.1632"></a>
+<span class="sourceLineNo">1633</span>        final long[] childProcIds = procStack.getSubprocedureIds();<a name="line.1633"></a>
+<span class="sourceLineNo">1634</span>        if (childProcIds != null) {<a name="line.1634"></a>
+<span class="sourceLineNo">1635</span>          store.delete(procedure, childProcIds);<a name="line.1635"></a>
+<span class="sourceLineNo">1636</span>          for (int i = 0; i &lt; childProcIds.length; ++i) {<a name="line.1636"></a>
+<span class="sourceLineNo">1637</span>            procedures.remove(childProcIds[i]);<a name="line.1637"></a>
+<span class="sourceLineNo">1638</span>          }<a name="line.1638"></a>
+<span class="sourceLineNo">1639</span>        } else {<a name="line.1639"></a>
+<span class="sourceLineNo">1640</span>          store.update(procedure);<a name="line.1640"></a>
+<span class="sourceLineNo">1641</span>        }<a name="line.1641"></a>
+<span class="sourceLineNo">1642</span>      } else {<a name="line.1642"></a>
+<span class="sourceLineNo">1643</span>        store.update(procedure);<a name="line.1643"></a>
+<span class="sourceLineNo">1644</span>      }<a name="line.1644"></a>
+<span class="sourceLineNo">1645</span>    }<a name="line.1645"></a>
+<span class="sourceLineNo">1646</span>  }<a name="line.1646"></a>
+<span class="sourceLineNo">1647</span><a name="line.1647"></a>
+<span class="sourceLineNo">1648</span>  private void handleInterruptedException(final Procedure proc, final InterruptedException e) {<a name="line.1648"></a>
+<span class="sourceLineNo">1649</span>    if (LOG.isTraceEnabled()) {<a name="line.1649"></a>
+<span class="sourceLineNo">1650</span>      LOG.trace("Interrupt during " + proc + ". suspend and retry it later.", e);<a name="line.1650"></a>
+<span class="sourceLineNo">1651</span>    }<a name="line.1651"></a>
+<span class="sourceLineNo">1652</span><a name="line.1652"></a>
+<span class="sourceLineNo">1653</span>    // NOTE: We don't call Thread.currentThread().interrupt()<a name="line.1653"></a>
+<span class="sourceLineNo">1654</span>    // because otherwise all the subsequent calls e.g. Thread.sleep() will throw<a name="line.1654"></a>
+<span class="sourceLineNo">1655</span>    // the InterruptedException. If the master is going down, we will be notified<a name="line.1655"></a>
+<span class="sourceLineNo">1656</span>    // and the executor/store will be stopped.<a name="line.1656"></a>
+<span class="sourceLineNo">1657</span>    // (The interrupted procedure will be retried on the next run)<a name="line.1657"></a>
+<span class="sourceLineNo">1658</span>  }<a name="line.1658"></a>
+<span class="sourceLineNo">1659</span><a name="line.1659"></a>
+<span class="sourceLineNo">1660</span>  private void execCompletionCleanup(final Procedure proc) {<a name="line.1660"></a>
+<span class="sourceLineNo">1661</span>    final TEnvironment env = getEnvironment();<a name="line.1661"></a>
+<span class="sourceLineNo">1662</span>    if (proc.holdLock(env) &amp;&amp; proc.hasLock(env)) {<a name="line.1662"></a>
+<span class="sourceLineNo">1663</span>      releaseLock(proc, true);<a name="line.1663"></a>
+<span class="sourceLineNo">1664</span>    }<a name="line.1664"></a>
+<span class="sourceLineNo">1665</span>    try {<a name="line.1665"></a>
+<span class="sourceLineNo">1666</span>      proc.completionCleanup(env);<a name="line.1666"></a>
+<span class="sourceLineNo">1667</span>    } catch (Throwable e) {<a name="line.1667"></a>
+<span class="sourceLineNo">1668</span>      // Catch NullPointerExceptions or similar errors...<a name="line.1668"></a>
+<span class="sourceLineNo">1669</span>      LOG.error("CODE-BUG: uncatched runtime exception for procedure: " + proc, e);<a name="line.1669"></a>
+<span class="sourceLineNo">1670</span>    }<a name="line.1670"></a>
+<span class="sourceLineNo">1671</span>  }<a name="line.1671"></a>
+<span class="sourceLineNo">1672</span><a name="line.1672"></a>
+<span class="sourceLineNo">1673</span>  private void procedureFinished(final Procedure proc) {<a name="line.1673"></a>
+<span class="sourceLineNo">1674</span>    // call the procedure completion cleanup handler<a name="line.1674"></a>
+<span class="sourceLineNo">1675</span>    execCompletionCleanup(proc);<a name="line.1675"></a>
+<span class="sourceLineNo">1676</span><a name="line.1676"></a>
+<span class="sourceLineNo">1677</span>    CompletedProcedureRetainer retainer = new CompletedProcedureRetainer(proc);<a name="line.1677"></a>
+<span class="sourceLineNo">1678</span><a name="line.1678"></a>
+<span class="sourceLineNo">1679</span>    // update the executor internal state maps<a name="line.1679"></a>
+<span class="sourceLineNo">1680</span>    if (!proc.shouldWaitClientAck(getEnvironment())) {<a name="line.1680"></a>
+<span class="sourceLineNo">1681</span>      retainer.setClientAckTime(0);<a name="line.1681"></a>
+<span class="sourceLineNo">1682</span>    }<a name="line.1682"></a>
+<span class="sourceLineNo">1683</span><a name="line.1683"></a>
+<span class="sourceLineNo">1684</span>    completed.put(proc.getProcId(), retainer);<a name="line.1684"></a>
+<span class="sourceLineNo">1685</span>    rollbackStack.remove(proc.getProcId());<a name="line.1685"></a>
+<span class="sourceLineNo">1686</span>    procedures.remove(proc.getProcId());<a name="line.1686"></a>
+<span class="sourceLineNo">1687</span><a name="line.1687"></a>
+<span class="sourceLineNo">1688</span>    // call the runnableSet completion cleanup handler<a name="line.1688"></a>
+<span class="sourceLineNo">1689</span>    try {<a name="line.1689"></a>
+<span class="sourceLineNo">1690</span>      scheduler.completionCleanup(proc);<a name="line.1690"></a>
+<span class="sourceLineNo">1691</span>    } catch (Throwable e) {<a name="line.1691"></a>
+<span class="sourceLineNo">1692</span>      // Catch NullPointerExceptions or similar errors...<a name="line.1692"></a>
+<span class="sourceLineNo">1693</span>      LOG.error("CODE-BUG: uncatched runtime exception for completion cleanup: " + proc, e);<a name="line.1693"></a>
+<span class="sourceLineNo">1694</span>    }<a name="line.1694"></a>
+<span class="sourceLineNo">1695</span><a name="line.1695"></a>
+<span class="sourceLineNo">1696</span>    // Notify the listeners<a name="line.1696"></a>
+<span class="sourceLineNo">1697</span>    sendProcedureFinishedNotification(proc.getProcId());<a name="line.1697"></a>
+<span class="sourceLineNo">1698</span>  }<a name="line.1698"></a>
+<span class="sourceLineNo">1699</span><a name="line.1699"></a>
+<span class="sourceLineNo">1700</span>  RootProcedureState getProcStack(long rootProcId) {<a name="line.1700"></a>
+<span class="sourceLineNo">1701</span>    return rollbackStack.get(rootProcId);<a name="line.1701"></a>
+<span class="sourceLineNo">1702</span>  }<a name="line.1702"></a>
+<span class="sourceLineNo">1703</span><a name="line.1703"></a>
+<span class="sourceLineNo">1704</span>  // ==========================================================================<a name="line.1704"></a>
+<span class="sourceLineNo">1705</span>  //  Worker Thread<a name="line.1705"></a>
+<span class="sourceLineNo">1706</span>  // ==========================================================================<a name="line.1706"></a>
+<span class="sourceLineNo">1707</span>  private class WorkerThread extends StoppableThread {<a name="line.1707"></a>
+<span class="sourceLineNo">1708</span>    private final AtomicLong executionStartTime = new AtomicLong(Long.MAX_VALUE);<a name="line.1708"></a>
+<span class="sourceLineNo">1709</span>    private volatile Procedure&lt;?&gt; activeProcedure;<a name="line.1709"></a>
+<span class="sourceLineNo">1710</span><a name="line.1710"></a>
+<span class="sourceLineNo">1711</span>    public WorkerThread(ThreadGroup group) {<a name="line.1711"></a>
+<span class="sourceLineNo">1712</span>      this(group, "PEWorker-");<a name="line.1712"></a>
+<span class="sourceLineNo">1713</span>    }<a name="line.1713"></a>
+<span class="sourceLineNo">1714</span><a name="line.1714"></a>
+<span class="sourceLineNo">1715</span>    protected WorkerThread(ThreadGroup group, String prefix) {<a name="line.1715"></a>
+<span class="sourceLineNo">1716</span>      super(group, prefix + workerId.incrementAndGet());<a name="line.1716"></a>
+<span class="sourceLineNo">1717</span>      setDaemon(true);<a name="line.1717"></a>
+<span class="sourceLineNo">1718</span>    }<a name="line.1718"></a>
+<span class="sourceLineNo">1719</span><a name="line.1719"></a>
+<span class="sourceLineNo">1720</span>    @Override<a name="line.1720"></a>
+<span class="sourceLineNo">1721</span>    public void sendStopSignal() {<a name="line.1721"></a>
+<span class="sourceLineNo">1722</span>      scheduler.signalAll();<a name="line.1722"></a>
+<span class="sourceLineNo">1723</span>    }<a name="line.1723"></a>
+<span class="sourceLineNo">1724</span><a name="line.1724"></a>
+<span class="sourceLineNo">1725</span>    @Override<a name="line.1725"></a>
+<span class="sourceLineNo">1726</span>    public void run() {<a name="line.1726"></a>
+<span class="sourceLineNo">1727</span>      long lastUpdate = EnvironmentEdgeManager.currentTime();<a name="line.1727"></a>
+<span class="sourceLineNo">1728</span>      try {<a name="line.1728"></a>
+<span class="sourceLineNo">1729</span>        while (isRunning() &amp;&amp; keepAlive(lastUpdate)) {<a name="line.1729"></a>
+<span class="sourceLineNo">1730</span>          Procedure&lt;?&gt; proc = scheduler.poll(keepAliveTime, TimeUnit.MILLISECONDS);<a name="line.1730"></a>
+<span class="sourceLineNo">1731</span>          if (proc == null) {<a name="line.1731"></a>
+<span class="sourceLineNo">1732</span>            continue;<a name="line.1732"></a>
+<span class="sourceLineNo">1733</span>          }<a name="line.1733"></a>
+<span class="sourceLineNo">1734</span>          this.activeProcedure = proc;<a name="line.1734"></a>
+<span class="sourceLineNo">1735</span>          int activeCount = activeExecutorCount.incrementAndGet();<a name="line.1735"></a>
+<span class="sourceLineNo">1736</span>          int runningCount = store.setRunningProcedureCount(activeCount);<a name="line.1736"></a>
+<span class="sourceLineNo">1737</span>          LOG.trace("Execute pid={} runningCount={}, activeCount={}", proc.getProcId(),<a name="line.1737"></a>
+<span class="sourceLineNo">1738</span>            runningCount, activeCount);<a name="line.1738"></a>
+<span class="sourceLineNo">1739</span>          executionStartTime.set(EnvironmentEdgeManager.currentTime());<a name="line.1739"></a>
+<span class="sourceLineNo">1740</span>          try {<a name="line.1740"></a>
+<span class="sourceLineNo">1741</span>            executeProcedure(proc);<a name="line.1741"></a>
+<span class="sourceLineNo">1742</span>          } catch (AssertionError e) {<a name="line.1742"></a>
+<span class="sourceLineNo">1743</span>            LOG.info("ASSERT pid=" + proc.getProcId(), e);<a name="line.1743"></a>
+<span class="sourceLineNo">1744</span>            throw e;<a name="line.1744"></a>
+<span class="sourceLineNo">1745</span>          } finally {<a name="line.1745"></a>
+<span class="sourceLineNo">1746</span>            activeCount = activeExecutorCount.decrementAndGet();<a name="line.1746"></a>
+<span class="sourceLineNo">1747</span>            runningCount = store.setRunningProcedureCount(activeCount);<a name="line.1747"></a>
+<span class="sourceLineNo">1748</span>            LOG.trace("Halt pid={} runningCount={}, activeCount={}", proc.getProcId(),<a name="line.1748"></a>
+<span class="sourceLineNo">1749</span>              runningCount, activeCount);<a name="line.1749"></a>
+<span class="sourceLineNo">1750</span>            this.activeProcedure = null;<a name="line.1750"></a>
+<span class="sourceLineNo">1751</span>            lastUpdate = EnvironmentEdgeManager.currentTime();<a name="line.1751"></a>
+<span class="sourceLineNo">1752</span>            executionStartTime.set(Long.MAX_VALUE);<a name="line.1752"></a>
+<span class="sourceLineNo">1753</span>          }<a name="line.1753"></a>
+<span class="sourceLineNo">1754</span>        }<a name="line.1754"></a>
+<span class="sourceLineNo">1755</span>      } catch (Throwable t) {<a name="line.1755"></a>
+<span class="sourceLineNo">1756</span>        LOG.warn("Worker terminating UNNATURALLY {}", this.activeProcedure, t);<a name="line.1756"></a>
+<span class="sourceLineNo">1757</span>      } finally {<a name="line.1757"></a>
+<span class="sourceLineNo">1758</span>        LOG.trace("Worker terminated.");<a name="line.1758"></a>
+<span class="sourceLineNo">1759</span>      }<a name="line.1759"></a>
+<span class="sourceLineNo">1760</span>      workerThreads.remove(this);<a name="line.1760"></a>
+<span class="sourceLineNo">1761</span>    }<a name="line.1761"></a>
+<span class="sourceLineNo">1762</span><a name="line.1762"></a>
+<span class="sourceLineNo">1763</span>    @Override<a name="line.1763"></a>
+<span class="sourceLineNo">1764</span>    public String toString() {<a name="line.1764"></a>
+<span class="sourceLineNo">1765</span>      Procedure&lt;?&gt; p = this.activeProcedure;<a name="line.1765"></a>
+<span class="sourceLineNo">1766</span>      return getName() + "(pid=" + (p == null? Procedure.NO_PROC_ID: p.getProcId() + ")");<a name="line.1766"></a>
+<span class="sourceLineNo">1767</span>    }<a name="line.1767"></a>
+<span class="sourceLineNo">1768</span><a name="line.1768"></a>
+<span class="sourceLineNo">1769</span>    /**<a name="line.1769"></a>
+<span class="sourceLineNo">1770</span>     * @return the time since the current procedure is running<a name="line.1770"></a>
+<span class="sourceLineNo">1771</span>     */<a name="line.1771"></a>
+<span class="sourceLineNo">1772</span>    public long getCurrentRunTime() {<a name="line.1772"></a>
+<span class="sourceLineNo">1773</span>      return EnvironmentEdgeManager.currentTime() - executionStartTime.get();<a name="line.1773"></a>
+<span class="sourceLineNo">1774</span>    }<a name="line.1774"></a>
+<span class="sourceLineNo">1775</span><a name="line.1775"></a>
+<span class="sourceLineNo">1776</span>    // core worker never timeout<a name="line.1776"></a>
+<span class="sourceLineNo">1777</span>    protected boolean keepAlive(long lastUpdate) {<a name="line.1777"></a>
+<span class="sourceLineNo">1778</span>      return true;<a name="line.1778"></a>
+<span class="sourceLineNo">1779</span>    }<a name="line.1779"></a>
+<span class="sourceLineNo">1780</span>  }<a name="line.1780"></a>
+<span class="sourceLineNo">1781</span><a name="line.1781"></a>
+<span class="sourceLineNo">1782</span>  // A worker thread which can be added when core workers are stuck. Will timeout after<a name="line.1782"></a>
+<span class="sourceLineNo">1783</span>  // keepAliveTime if there is no procedure to run.<a name="line.1783"></a>
+<span class="sourceLineNo">1784</span>  private final class KeepAliveWorkerThread extends WorkerThread {<a name="line.1784"></a>
+<span class="sourceLineNo">1785</span><a name="line.1785"></a>
+<span class="sourceLineNo">1786</span>    public KeepAliveWorkerThread(ThreadGroup group) {<a name="line.1786"></a>
+<span class="sourceLineNo">1787</span>      super(group, "KeepAlivePEWorker-");<a name="line.1787"></a>
+<span class="sourceLineNo">1788</span>    }<a name="line.1788"></a>
+<span class="sourceLineNo">1789</span><a name="line.1789"></a>
+<span class="sourceLineNo">1790</span>    @Override<a name="line.1790"></a>
+<span class="sourceLineNo">1791</span>    protected boolean keepAlive(long lastUpdate) {<a name="line.1791"></a>
+<span class="sourceLineNo">1792</span>      return EnvironmentEdgeManager.currentTime() - lastUpdate &lt; keepAliveTime;<a name="line.1792"></a>
+<span class="sourceLineNo">1793</span>    }<a name="line.1793"></a>
+<span class="sourceLineNo">1794</span>  }<a name="line.1794"></a>
+<span class="sourceLineNo">1795</span><a name="line.1795"></a>
+<span class="sourceLineNo">1796</span>  // ----------------------------------------------------------------------------<a name="line.1796"></a>
+<span class="sourceLineNo">1797</span>  // TODO-MAYBE: Should we provide a InlineChore to notify the store with the<a name="line.1797"></a>
+<span class="sourceLineNo">1798</span>  // full set of procedures pending and completed to write a compacted<a name="line.1798"></a>
+<span class="sourceLineNo">1799</span>  // version of the log (in case is a log)?<a name="line.1799"></a>
+<span class="sourceLineNo">1800</span>  // In theory no, procedures are have a short life, so at some point the store<a name="line.1800"></a>
+<span class="sourceLineNo">1801</span>  // will have the tracker saying everything is in the last log.<a name="line.1801"></a>
+<span class="sourceLineNo">1802</span>  // ----------------------------------------------------------------------------<a name="line.1802"></a>
+<span class="sourceLineNo">1803</span><a name="line.1803"></a>
+<span class="sourceLineNo">1804</span>  private final class WorkerMonitor extends InlineChore {<a name="line.1804"></a>
+<span class="sourceLineNo">1805</span>    public static final String WORKER_MONITOR_INTERVAL_CONF_KEY =<a name="line.1805"></a>
+<span class="sourceLineNo">1806</span>        "hbase.procedure.worker.monitor.interval.msec";<a name="line.1806"></a>
+<span class="sourceLineNo">1807</span>    private static final int DEFAULT_WORKER_MONITOR_INTERVAL = 5000; // 5sec<a name="line.1807"></a>
+<span class="sourceLineNo">1808</span><a name="line.1808"></a>
+<span class="sourceLineNo">1809</span>    public static final String WORKER_STUCK_THRESHOLD_CONF_KEY =<a name="line.1809"></a>
+<span class="sourceLineNo">1810</span>        "hbase.procedure.worker.stuck.threshold.msec";<a name="line.1810"></a>
+<span class="sourceLineNo">1811</span>    private static final int DEFAULT_WORKER_STUCK_THRESHOLD = 10000; // 10sec<a name="line.1811"></a>
+<span class="sourceLineNo">1812</span><a name="line.1812"></a>
+<span class="sourceLineNo">1813</span>    public static final String WORKER_ADD_STUCK_PERCENTAGE_CONF_KEY =<a name="line.1813"></a>
+<span class="sourceLineNo">1814</span>        "hbase.procedure.worker.add.stuck.percentage";<a name="line.1814"></a>
+<span class="sourceLineNo">1815</span>    private static final float DEFAULT_WORKER_ADD_STUCK_PERCENTAGE = 0.5f; // 50% stuck<a name="line.1815"></a>
+<span class="sourceLineNo">1816</span><a name="line.1816"></a>
+<span class="sourceLineNo">1817</span>    private float addWorkerStuckPercentage = DEFAULT_WORKER_ADD_STUCK_PERCENTAGE;<a name="line.1817"></a>
+<span class="sourceLineNo">1818</span>    private int timeoutInterval = DEFAULT_WORKER_MONITOR_INTERVAL;<a name="line.1818"></a>
+<span class="sourceLineNo">1819</span>    private int stuckThreshold = DEFAULT_WORKER_STUCK_THRESHOLD;<a name="line.1819"></a>
+<span class="sourceLineNo">1820</span><a name="line.1820"></a>
+<span class="sourceLineNo">1821</span>    public WorkerMonitor() {<a name="line.1821"></a>
+<span class="sourceLineNo">1822</span>      refreshConfig();<a name="line.1822"></a>
+<span class="sourceLineNo">1823</span>    }<a name="line.1823"></a>
+<span class="sourceLineNo">1824</span><a name="line.1824"></a>
+<span class="sourceLineNo">1825</span>    @Override<a name="line.1825"></a>
+<span class="sourceLineNo">1826</span>    public void run() {<a name="line.1826"></a>
+<span class="sourceLineNo">1827</span>      final int stuckCount = checkForStuckWorkers();<a name="line.1827"></a>
+<span class="sourceLineNo">1828</span>      checkThreadCount(stuckCount);<a name="line.1828"></a>
+<span class="sourceLineNo">1829</span><a name="line.1829"></a>
+<span class="sourceLineNo">1830</span>      // refresh interval (poor man dynamic conf update)<a name="line.1830"></a>
+<span class="sourceLineNo">1831</span>      refreshConfig();<a name="line.1831"></a>
+<span class="sourceLineNo">1832</span>    }<a name="line.1832"></a>
+<span class="sourceLineNo">1833</span><a name="line.1833"></a>
+<span class="sourceLineNo">1834</span>    private int checkForStuckWorkers() {<a name="line.1834"></a>
+<span class="sourceLineNo">1835</span>      // check if any of the worker is stuck<a name="line.1835"></a>
+<span class="sourceLineNo">1836</span>      int stuckCount = 0;<a name="line.1836"></a>
+<span class="sourceLineNo">1837</span>      for (WorkerThread worker : workerThreads) {<a name="line.1837"></a>
+<span class="sourceLineNo">1838</span>        if (worker.getCurrentRunTime() &lt; stuckThreshold) {<a name="line.1838"></a>
+<span class="sourceLineNo">1839</span>          continue;<a name="line.1839"></a>
+<span class="sourceLineNo">1840</span>        }<a name="line.1840"></a>
+<span class="sourceLineNo">1841</span><a name="line.1841"></a>
+<span class="sourceLineNo">1842</span>        // WARN the worker is stuck<a name="line.1842"></a>
+<span class="sourceLineNo">1843</span>        

<TRUNCATED>