You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/05/18 08:34:37 UTC
[incubator-doris] 02/09: [fix] fix bug that replica can not be repaired duo to DECOMMISSION state (#9424)
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch dev-1.0.1
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
commit b20df704b18b9b6741401e9211d180d9b19c8532
Author: Mingyu Chen <mo...@gmail.com>
AuthorDate: Tue May 17 22:36:30 2022 +0800
[fix] fix bug that replica can not be repaired duo to DECOMMISSION state (#9424)
Reset state of replica which state are in DECOMMISSION after finished scheduling.
---
.../org/apache/doris/clone/TabletSchedCtx.java | 24 ++++++++++++++
.../org/apache/doris/clone/TabletScheduler.java | 38 +++++++++++++---------
2 files changed, 47 insertions(+), 15 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
index 6610b484ae..ac7a96efc2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
@@ -1098,6 +1098,8 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
sb.append(". to backend: ").append(destBackendId);
sb.append(", dest path hash: ").append(destPathHash);
}
+ sb.append(", visible version: ").append(visibleVersion);
+ sb.append(", committed version: ").append(committedVersion);
if (errMsg != null) {
sb.append(". err: ").append(errMsg);
}
@@ -1119,4 +1121,26 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
}
}
}
+
+ /**
+ * call this when releaseTabletCtx()
+ */
+ public void resetReplicaState() {
+ if (tablet != null) {
+ for (Replica replica : tablet.getReplicas()) {
+ // To address issue: https://github.com/apache/incubator-doris/issues/9422
+ // the DECOMMISSION state is set in TabletScheduler and not persist to meta.
+ // So it is reasonable to reset this state if we failed to scheduler this tablet.
+ // That is, if the TabletScheduler cannot process the tablet, then it should reset
+ // any intermediate state it set during the scheduling process.
+ if (replica.getState() == ReplicaState.DECOMMISSION) {
+ replica.setState(ReplicaState.NORMAL);
+ replica.setWatermarkTxnId(-1);
+ LOG.debug("reset replica {} on backend {} of tablet {} state from DECOMMISSION to NORMAL",
+ replica.getId(), replica.getBackendId(), tabletId);
+ }
+ }
+ }
+ }
+
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
index 1f80cada9e..8231269816 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
@@ -385,12 +385,12 @@ public class TabletScheduler extends MasterDaemon {
if (tabletCtx.getType() == Type.BALANCE) {
// if balance is disabled, remove this tablet
if (Config.disable_balance) {
- finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED,
+ finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getStatus(),
"disable balance and " + e.getMessage());
} else {
// remove the balance task if it fails to be scheduled many times
if (tabletCtx.getFailedSchedCounter() > 10) {
- finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED,
+ finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getStatus(),
"schedule failed too many times and " + e.getMessage());
} else {
// we must release resource it current hold, and be scheduled again
@@ -410,19 +410,19 @@ public class TabletScheduler extends MasterDaemon {
} else if (e.getStatus() == Status.FINISHED) {
// schedule redundant tablet or scheduler disabled will throw this exception
stat.counterTabletScheduledSucceeded.incrementAndGet();
- finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.FINISHED, e.getMessage());
+ finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.FINISHED, e.getStatus(), e.getMessage());
} else {
Preconditions.checkState(e.getStatus() == Status.UNRECOVERABLE, e.getStatus());
// discard
stat.counterTabletScheduledDiscard.incrementAndGet();
- finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getMessage());
+ finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getStatus(), e.getMessage());
}
continue;
} catch (Exception e) {
LOG.warn("got unexpected exception, discard this schedule. tablet: {}",
tabletCtx.getTabletId(), e);
stat.counterTabletScheduledFailed.incrementAndGet();
- finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.UNEXPECTED, e.getMessage());
+ finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.UNEXPECTED, Status.UNRECOVERABLE, e.getMessage());
continue;
}
@@ -532,7 +532,8 @@ public class TabletScheduler extends MasterDaemon {
for (TransactionState transactionState : dbTransactionMgr.getPreCommittedTxnList()) {
if(transactionState.getTableIdList().contains(tbl.getId())) {
// If table releate to transaction with precommitted status, do not allow to do balance.
- throw new SchedException(Status.UNRECOVERABLE, "There exists PRECOMMITTED transaction releated to table");
+ throw new SchedException(Status.UNRECOVERABLE,
+ "There exists PRECOMMITTED transaction related to table");
}
}
} catch (AnalysisException e) {
@@ -1053,7 +1054,6 @@ public class TabletScheduler extends MasterDaemon {
}
private void deleteReplicaInternal(TabletSchedCtx tabletCtx, Replica replica, String reason, boolean force) throws SchedException {
-
/*
* Before deleting a replica, we should make sure that there is no running txn on it and no more txns will be on it.
* So we do followings:
@@ -1069,6 +1069,8 @@ public class TabletScheduler extends MasterDaemon {
replica.setState(ReplicaState.DECOMMISSION);
// set priority to normal because it may wait for a long time. Remain it as VERY_HIGH may block other task.
tabletCtx.setOrigPriority(Priority.NORMAL);
+ LOG.debug("set replica {} on backend {} of tablet {} state to DECOMMISSION",
+ replica.getId(), replica.getBackendId(), tabletCtx.getTabletId());
throw new SchedException(Status.SCHEDULE_FAILED, "set watermark txn " + nextTxnId);
} else if (replica.getState() == ReplicaState.DECOMMISSION && replica.getWatermarkTxnId() != -1) {
long watermarkTxnId = replica.getWatermarkTxnId();
@@ -1323,17 +1325,20 @@ public class TabletScheduler extends MasterDaemon {
addTablet(tabletCtx, true /* force */);
}
- private void finalizeTabletCtx(TabletSchedCtx tabletCtx, TabletSchedCtx.State state, String reason) {
+ private void finalizeTabletCtx(TabletSchedCtx tabletCtx, TabletSchedCtx.State state, Status status, String reason) {
// use 2 steps to avoid nested database lock and synchronized.(releaseTabletCtx() may hold db lock)
// remove the tablet ctx, so that no other process can see it
removeTabletCtx(tabletCtx, reason);
// release resources taken by tablet ctx
- releaseTabletCtx(tabletCtx, state);
+ releaseTabletCtx(tabletCtx, state, status == Status.UNRECOVERABLE);
}
- private void releaseTabletCtx(TabletSchedCtx tabletCtx, TabletSchedCtx.State state) {
+ private void releaseTabletCtx(TabletSchedCtx tabletCtx, TabletSchedCtx.State state, boolean resetReplicaState) {
tabletCtx.setState(state);
tabletCtx.releaseResource(this);
+ if (resetReplicaState) {
+ tabletCtx.resetReplicaState();
+ }
tabletCtx.setFinishedTime(System.currentTimeMillis());
}
@@ -1393,25 +1398,25 @@ public class TabletScheduler extends MasterDaemon {
} else if (e.getStatus() == Status.UNRECOVERABLE) {
// unrecoverable
stat.counterTabletScheduledDiscard.incrementAndGet();
- finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getMessage());
+ finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getStatus(), e.getMessage());
return true;
} else if (e.getStatus() == Status.FINISHED) {
// tablet is already healthy, just remove
- finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getMessage());
+ finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getStatus(), e.getMessage());
return true;
}
} catch (Exception e) {
LOG.warn("got unexpected exception when finish clone task. tablet: {}",
tabletCtx.getTabletId(), e);
stat.counterTabletScheduledDiscard.incrementAndGet();
- finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.UNEXPECTED, e.getMessage());
+ finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.UNEXPECTED, Status.UNRECOVERABLE, e.getMessage());
return true;
}
Preconditions.checkState(tabletCtx.getState() == TabletSchedCtx.State.FINISHED);
stat.counterCloneTaskSucceeded.incrementAndGet();
gatherStatistics(tabletCtx);
- finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.FINISHED, "finished");
+ finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.FINISHED, Status.FINISHED, "finished");
return true;
}
@@ -1475,7 +1480,10 @@ public class TabletScheduler extends MasterDaemon {
// 2. release ctx
timeoutTablets.stream().forEach(t -> {
- releaseTabletCtx(t, TabletSchedCtx.State.CANCELLED);
+ // Set "resetReplicaState" to true because
+ // the timeout task should also be considered as UNRECOVERABLE,
+ // so need to reset replica state.
+ releaseTabletCtx(t, TabletSchedCtx.State.CANCELLED, true);
stat.counterCloneTaskTimeout.incrementAndGet();
});
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org