You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/05/18 08:34:37 UTC

[incubator-doris] 02/09: [fix] fix bug that replica can not be repaired duo to DECOMMISSION state (#9424)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch dev-1.0.1
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git

commit b20df704b18b9b6741401e9211d180d9b19c8532
Author: Mingyu Chen <mo...@gmail.com>
AuthorDate: Tue May 17 22:36:30 2022 +0800

    [fix] fix bug that replica can not be repaired duo to DECOMMISSION state (#9424)
    
    Reset state of replica which state are in DECOMMISSION after finished scheduling.
---
 .../org/apache/doris/clone/TabletSchedCtx.java     | 24 ++++++++++++++
 .../org/apache/doris/clone/TabletScheduler.java    | 38 +++++++++++++---------
 2 files changed, 47 insertions(+), 15 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
index 6610b484ae..ac7a96efc2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
@@ -1098,6 +1098,8 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
             sb.append(". to backend: ").append(destBackendId);
             sb.append(", dest path hash: ").append(destPathHash);
         }
+        sb.append(", visible version: ").append(visibleVersion);
+        sb.append(", committed version: ").append(committedVersion);
         if (errMsg != null) {
             sb.append(". err: ").append(errMsg);
         }
@@ -1119,4 +1121,26 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
             }
         }
     }
+
+    /**
+     * call this when releaseTabletCtx()
+     */
+    public void resetReplicaState() {
+        if (tablet != null) {
+            for (Replica replica : tablet.getReplicas()) {
+                // To address issue: https://github.com/apache/incubator-doris/issues/9422
+                // the DECOMMISSION state is set in TabletScheduler and not persist to meta.
+                // So it is reasonable to reset this state if we failed to scheduler this tablet.
+                // That is, if the TabletScheduler cannot process the tablet, then it should reset
+                // any intermediate state it set during the scheduling process.
+                if (replica.getState() == ReplicaState.DECOMMISSION) {
+                    replica.setState(ReplicaState.NORMAL);
+                    replica.setWatermarkTxnId(-1);
+                    LOG.debug("reset replica {} on backend {} of tablet {} state from DECOMMISSION to NORMAL",
+                            replica.getId(), replica.getBackendId(), tabletId);
+                }
+            }
+        }
+    }
+
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
index 1f80cada9e..8231269816 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
@@ -385,12 +385,12 @@ public class TabletScheduler extends MasterDaemon {
                     if (tabletCtx.getType() == Type.BALANCE) {
                         // if balance is disabled, remove this tablet
                         if (Config.disable_balance) {
-                            finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED,
+                            finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getStatus(),
                                     "disable balance and " + e.getMessage());
                         } else {
                             // remove the balance task if it fails to be scheduled many times
                             if (tabletCtx.getFailedSchedCounter() > 10) {
-                                finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED,
+                                finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getStatus(),
                                         "schedule failed too many times and " + e.getMessage());
                             } else {
                                 // we must release resource it current hold, and be scheduled again
@@ -410,19 +410,19 @@ public class TabletScheduler extends MasterDaemon {
                 } else if (e.getStatus() == Status.FINISHED) {
                     // schedule redundant tablet or scheduler disabled will throw this exception
                     stat.counterTabletScheduledSucceeded.incrementAndGet();
-                    finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.FINISHED, e.getMessage());
+                    finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.FINISHED, e.getStatus(), e.getMessage());
                 } else {
                     Preconditions.checkState(e.getStatus() == Status.UNRECOVERABLE, e.getStatus());
                     // discard
                     stat.counterTabletScheduledDiscard.incrementAndGet();
-                    finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getMessage());
+                    finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getStatus(), e.getMessage());
                 }
                 continue;
             } catch (Exception e) {
                 LOG.warn("got unexpected exception, discard this schedule. tablet: {}",
                         tabletCtx.getTabletId(), e);
                 stat.counterTabletScheduledFailed.incrementAndGet();
-                finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.UNEXPECTED, e.getMessage());
+                finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.UNEXPECTED, Status.UNRECOVERABLE, e.getMessage());
                 continue;
             }
 
@@ -532,7 +532,8 @@ public class TabletScheduler extends MasterDaemon {
                     for (TransactionState transactionState : dbTransactionMgr.getPreCommittedTxnList()) {
                         if(transactionState.getTableIdList().contains(tbl.getId())) {
                             // If table releate to transaction with precommitted status, do not allow to do balance.
-                            throw new SchedException(Status.UNRECOVERABLE, "There exists PRECOMMITTED transaction releated to table");
+                            throw new SchedException(Status.UNRECOVERABLE,
+                                    "There exists PRECOMMITTED transaction related to table");
                         }
                     }
                 } catch (AnalysisException e) {
@@ -1053,7 +1054,6 @@ public class TabletScheduler extends MasterDaemon {
     }
 
     private void deleteReplicaInternal(TabletSchedCtx tabletCtx, Replica replica, String reason, boolean force) throws SchedException {
-
         /*
          * Before deleting a replica, we should make sure that there is no running txn on it and no more txns will be on it.
          * So we do followings:
@@ -1069,6 +1069,8 @@ public class TabletScheduler extends MasterDaemon {
             replica.setState(ReplicaState.DECOMMISSION);
             // set priority to normal because it may wait for a long time. Remain it as VERY_HIGH may block other task.
             tabletCtx.setOrigPriority(Priority.NORMAL);
+            LOG.debug("set replica {} on backend {} of tablet {} state to DECOMMISSION",
+                    replica.getId(), replica.getBackendId(), tabletCtx.getTabletId());
             throw new SchedException(Status.SCHEDULE_FAILED, "set watermark txn " + nextTxnId);
         } else if (replica.getState() == ReplicaState.DECOMMISSION && replica.getWatermarkTxnId() != -1) {
             long watermarkTxnId = replica.getWatermarkTxnId();
@@ -1323,17 +1325,20 @@ public class TabletScheduler extends MasterDaemon {
         addTablet(tabletCtx, true /* force */);
     }
 
-    private void finalizeTabletCtx(TabletSchedCtx tabletCtx, TabletSchedCtx.State state, String reason) {
+    private void finalizeTabletCtx(TabletSchedCtx tabletCtx, TabletSchedCtx.State state, Status status, String reason) {
         // use 2 steps to avoid nested database lock and synchronized.(releaseTabletCtx() may hold db lock)
         // remove the tablet ctx, so that no other process can see it
         removeTabletCtx(tabletCtx, reason);
         // release resources taken by tablet ctx
-        releaseTabletCtx(tabletCtx, state);
+        releaseTabletCtx(tabletCtx, state, status == Status.UNRECOVERABLE);
     }
 
-    private void releaseTabletCtx(TabletSchedCtx tabletCtx, TabletSchedCtx.State state) {
+    private void releaseTabletCtx(TabletSchedCtx tabletCtx, TabletSchedCtx.State state, boolean resetReplicaState) {
         tabletCtx.setState(state);
         tabletCtx.releaseResource(this);
+        if (resetReplicaState) {
+            tabletCtx.resetReplicaState();
+        }
         tabletCtx.setFinishedTime(System.currentTimeMillis());
     }
 
@@ -1393,25 +1398,25 @@ public class TabletScheduler extends MasterDaemon {
             } else if (e.getStatus() == Status.UNRECOVERABLE) {
                 // unrecoverable
                 stat.counterTabletScheduledDiscard.incrementAndGet();
-                finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getMessage());
+                finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getStatus(), e.getMessage());
                 return true;
             } else if (e.getStatus() == Status.FINISHED) {
                 // tablet is already healthy, just remove
-                finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getMessage());
+                finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getStatus(), e.getMessage());
                 return true;
             }
         } catch (Exception e) {
             LOG.warn("got unexpected exception when finish clone task. tablet: {}",
                     tabletCtx.getTabletId(), e);
             stat.counterTabletScheduledDiscard.incrementAndGet();
-            finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.UNEXPECTED, e.getMessage());
+            finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.UNEXPECTED, Status.UNRECOVERABLE, e.getMessage());
             return true;
         }
 
         Preconditions.checkState(tabletCtx.getState() == TabletSchedCtx.State.FINISHED);
         stat.counterCloneTaskSucceeded.incrementAndGet();
         gatherStatistics(tabletCtx);
-        finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.FINISHED, "finished");
+        finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.FINISHED, Status.FINISHED, "finished");
         return true;
     }
 
@@ -1475,7 +1480,10 @@ public class TabletScheduler extends MasterDaemon {
 
         // 2. release ctx
         timeoutTablets.stream().forEach(t -> {
-            releaseTabletCtx(t, TabletSchedCtx.State.CANCELLED);
+            // Set "resetReplicaState" to true because
+            // the timeout task should also be considered as UNRECOVERABLE,
+            // so need to reset replica state.
+            releaseTabletCtx(t, TabletSchedCtx.State.CANCELLED, true);
             stat.counterCloneTaskTimeout.incrementAndGet();
         });
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org