You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2023/06/08 09:46:31 UTC

[doris] 01/02: [minor](clone) add more debug log for tablet scheduler (#19892)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 96b007995fc8f3425e7383cc1e4780ad3dadeafa
Author: Mingyu Chen <mo...@163.com>
AuthorDate: Sat May 20 15:59:26 2023 +0800

    [minor](clone) add more debug log for tablet scheduler (#19892)
    
    Sometimes I find that the tablet scheduler can not schedule tablet, and with no more info for debugging.
    So I add some debug log for this process.
    No logic is changed.
---
 .../apache/doris/clone/BackendLoadStatistic.java   |  2 ++
 .../org/apache/doris/clone/TabletSchedCtx.java     | 35 ++++++++++++++++++--
 .../org/apache/doris/clone/TabletScheduler.java    | 38 +++++++++++++++++++---
 3 files changed, 68 insertions(+), 7 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/BackendLoadStatistic.java b/fe/fe-core/src/main/java/org/apache/doris/clone/BackendLoadStatistic.java
index 47befaaccb..d040b8053c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/BackendLoadStatistic.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/BackendLoadStatistic.java
@@ -303,6 +303,8 @@ public class BackendLoadStatistic {
             RootPathLoadStatistic pathStatistic = pathStatistics.get(i);
             // if this is a supplement task, ignore the storage medium
             if (!isSupplement && pathStatistic.getStorageMedium() != medium) {
+                LOG.debug("backend {} path {}'s storage medium {} is not {} storage medium, actual: {}",
+                        beId, pathStatistic.getPath(), pathStatistic.getStorageMedium(), medium);
                 continue;
             }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
index 990153a00a..b889a70430 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
@@ -488,6 +488,7 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
         if (backend == null) {
             // containsBE() is currently only used for choosing dest backend to do clone task.
             // return true so that it won't choose this backend.
+            LOG.debug("desc backend {} does not exist, skip. tablet: {}", beId, tabletId);
             return true;
         }
         String host = backend.getHost();
@@ -495,13 +496,18 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
             Backend be = infoService.getBackend(replica.getBackendId());
             if (be == null) {
                 // BE has been dropped, skip it
+                LOG.debug("replica's backend {} does not exist, skip. tablet: {}", replica.getBackendId(), tabletId);
                 continue;
             }
             if (!Config.allow_replica_on_same_host && !FeConstants.runningUnitTest && host.equals(be.getHost())) {
+                LOG.debug("replica's backend {} is on same host {}, skip. tablet: {}",
+                        replica.getBackendId(), host, tabletId);
                 return true;
             }
 
             if (replica.getBackendId() == beId) {
+                LOG.debug("replica's backend {} is same as dest backend {}, skip. tablet: {}",
+                        replica.getBackendId(), beId, tabletId);
                 return true;
             }
         }
@@ -557,24 +563,34 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
         List<Replica> candidates = Lists.newArrayList();
         for (Replica replica : tablet.getReplicas()) {
             if (exceptBeId != -1 && replica.getBackendId() == exceptBeId) {
+                LOG.debug("replica's backend {} is same as except backend {}, skip. tablet: {}",
+                        replica.getBackendId(), exceptBeId, tabletId);
                 continue;
             }
 
             if (replica.isBad() || replica.tooSlow()) {
+                LOG.debug("replica {} is bad({}) or too slow({}), skip. tablet: {}",
+                        replica.getId(), replica.isBad(), replica.tooSlow(), tabletId);
                 continue;
             }
 
             Backend be = infoService.getBackend(replica.getBackendId());
             if (be == null || !be.isAlive()) {
                 // backend which is in decommission can still be the source backend
+                LOG.debug("replica's backend {} does not exist or is not alive, skip. tablet: {}",
+                        replica.getBackendId(), tabletId);
                 continue;
             }
 
             if (replica.getLastFailedVersion() > 0) {
+                LOG.debug("replica {} has failed version {}, skip. tablet: {}",
+                        replica.getId(), replica.getLastFailedVersion(), tabletId);
                 continue;
             }
 
             if (!replica.checkVersionCatchUp(visibleVersion, false)) {
+                LOG.debug("replica {} version {} has not catch up to visible version {}, skip. tablet: {}",
+                        replica.getId(), replica.getVersion(), visibleVersion, tabletId);
                 continue;
             }
 
@@ -591,14 +607,19 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
         for (Replica srcReplica : candidates) {
             PathSlot slot = backendsWorkingSlots.get(srcReplica.getBackendId());
             if (slot == null) {
+                LOG.debug("replica's backend {} does not have working slot, skip. tablet: {}",
+                        srcReplica.getBackendId(), tabletId);
                 continue;
             }
 
             long srcPathHash = slot.takeSlot(srcReplica.getPathHash());
-            if (srcPathHash != -1) {
-                setSrc(srcReplica);
-                return;
+            if (srcPathHash == -1) {
+                LOG.debug("replica's backend {} does not have available slot, skip. tablet: {}",
+                        srcReplica.getBackendId(), tabletId);
+                continue;
             }
+            setSrc(srcReplica);
+            return;
         }
         throw new SchedException(Status.SCHEDULE_FAILED, "unable to find source slot");
     }
@@ -629,11 +650,15 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
         Replica chosenReplica = null;
         for (Replica replica : tablet.getReplicas()) {
             if (replica.isBad()) {
+                LOG.debug("replica {} is bad, skip. tablet: {}",
+                        replica.getId(), tabletId);
                 continue;
             }
 
             Backend be = infoService.getBackend(replica.getBackendId());
             if (be == null || !be.isScheduleAvailable()) {
+                LOG.debug("replica's backend {} does not exist or is not scheduler available, skip. tablet: {}",
+                        replica.getBackendId(), tabletId);
                 continue;
             }
 
@@ -644,10 +669,14 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
                     && ((replica.getVersion() == visibleVersion)
                     || replica.getVersion() > visibleVersion) && replica.getState() != ReplicaState.DECOMMISSION) {
                 // skip healthy replica
+                LOG.debug("replica {} version {} is healthy, visible version {}, replica state {}, skip. tablet: {}",
+                        replica.getId(), replica.getVersion(), visibleVersion, replica.getState(), tabletId);
                 continue;
             }
 
             if (replica.needFurtherRepair()) {
+                LOG.debug("replica {} need further repair, choose it. tablet: {}",
+                        replica.getId(), tabletId);
                 chosenReplica = replica;
                 break;
             }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
index b243980362..c672976b92 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
@@ -1308,11 +1308,15 @@ public class TabletScheduler extends MasterDaemon {
         List<RootPathLoadStatistic> allFitPaths = Lists.newArrayList();
         for (BackendLoadStatistic bes : beStatistics) {
             if (!bes.isAvailable()) {
+                LOG.debug("backend {} is not available, skip. tablet: {}", bes.getBeId(), tabletCtx.getTabletId());
                 continue;
             }
 
             // exclude BE which already has replica of this tablet or another BE at same host has this replica
             if (tabletCtx.filterDestBE(bes.getBeId())) {
+                LOG.debug("backend {} already has replica of this tablet or another BE "
+                                + "at same host has this replica, skip. tablet: {}",
+                        bes.getBeId(), tabletCtx.getTabletId());
                 continue;
             }
 
@@ -1320,9 +1324,13 @@ public class TabletScheduler extends MasterDaemon {
             // Else, check the tag.
             if (forColocate) {
                 if (!tabletCtx.getColocateBackendsSet().contains(bes.getBeId())) {
+                    LOG.debug("backend {} is not in colocate backend set, skip. tablet: {}",
+                            bes.getBeId(), tabletCtx.getTabletId());
                     continue;
                 }
             } else if (!bes.getTag().equals(tag)) {
+                LOG.debug("backend {}'s tag {} is not equal to tablet's tag {}, skip. tablet: {}",
+                        bes.getBeId(), bes.getTag(), tag, tabletCtx.getTabletId());
                 continue;
             }
 
@@ -1331,6 +1339,7 @@ public class TabletScheduler extends MasterDaemon {
                     resultPaths, tabletCtx.getTabletStatus() != TabletStatus.REPLICA_RELOCATING
                     /* if REPLICA_RELOCATING, then it is not a supplement task */);
             if (!st.ok()) {
+                LOG.debug("unable to find path for tablet: {}. {}", tabletCtx, st);
                 // This is to solve, when we decommission some BEs with SSD disks,
                 // if there are no SSD disks on the remaining BEs, it will be impossible to select a
                 // suitable destination path.
@@ -1358,31 +1367,50 @@ public class TabletScheduler extends MasterDaemon {
         // we try to find a path with specified media type, if not find, arbitrarily use one.
         for (RootPathLoadStatistic rootPathLoadStatistic : allFitPaths) {
             if (rootPathLoadStatistic.getStorageMedium() != tabletCtx.getStorageMedium()) {
+                LOG.debug("backend {}'s path {}'s storage medium {} "
+                                + "is not equal to tablet's storage medium {}, skip. tablet: {}",
+                        rootPathLoadStatistic.getBeId(), rootPathLoadStatistic.getPathHash(),
+                        rootPathLoadStatistic.getStorageMedium(), tabletCtx.getStorageMedium(),
+                        tabletCtx.getTabletId());
                 continue;
             }
 
             PathSlot slot = backendsWorkingSlots.get(rootPathLoadStatistic.getBeId());
             if (slot == null) {
+                LOG.debug("backend {}'s path {}'s slot is null, skip. tablet: {}",
+                        rootPathLoadStatistic.getBeId(), rootPathLoadStatistic.getPathHash(),
+                        tabletCtx.getTabletId());
                 continue;
             }
 
             long pathHash = slot.takeSlot(rootPathLoadStatistic.getPathHash());
-            if (pathHash != -1) {
-                return rootPathLoadStatistic;
+            if (pathHash == -1) {
+                LOG.debug("backend {}'s path {}'s slot is full, skip. tablet: {}",
+                        rootPathLoadStatistic.getBeId(), rootPathLoadStatistic.getPathHash(),
+                        tabletCtx.getTabletId());
+                continue;
             }
+            return rootPathLoadStatistic;
         }
 
         // no root path with specified media type is found, get arbitrary one.
         for (RootPathLoadStatistic rootPathLoadStatistic : allFitPaths) {
             PathSlot slot = backendsWorkingSlots.get(rootPathLoadStatistic.getBeId());
             if (slot == null) {
+                LOG.debug("backend {}'s path {}'s slot is null, skip. tablet: {}",
+                        rootPathLoadStatistic.getBeId(), rootPathLoadStatistic.getPathHash(),
+                        tabletCtx.getTabletId());
                 continue;
             }
 
             long pathHash = slot.takeSlot(rootPathLoadStatistic.getPathHash());
-            if (pathHash != -1) {
-                return rootPathLoadStatistic;
+            if (pathHash == -1) {
+                LOG.debug("backend {}'s path {}'s slot is full, skip. tablet: {}",
+                        rootPathLoadStatistic.getBeId(), rootPathLoadStatistic.getPathHash(),
+                        tabletCtx.getTabletId());
+                continue;
             }
+            return rootPathLoadStatistic;
         }
 
         throw new SchedException(Status.SCHEDULE_FAILED, "unable to find dest path which can be fit in");
@@ -1709,10 +1737,12 @@ public class TabletScheduler extends MasterDaemon {
 
             Slot slot = pathSlots.get(pathHash);
             if (slot == null) {
+                LOG.debug("path {} is not exist", pathHash);
                 return -1;
             }
             slot.rectify();
             if (slot.available <= 0) {
+                LOG.debug("path {} has no available slot", pathHash);
                 return -1;
             }
             slot.available--;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org