You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2023/06/08 09:46:31 UTC
[doris] 01/02: [minor](clone) add more debug log for tablet scheduler (#19892)
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
commit 96b007995fc8f3425e7383cc1e4780ad3dadeafa
Author: Mingyu Chen <mo...@163.com>
AuthorDate: Sat May 20 15:59:26 2023 +0800
[minor](clone) add more debug log for tablet scheduler (#19892)
Sometimes I find that the tablet scheduler can not schedule tablet, and with no more info for debugging.
So I add some debug log for this process.
No logic is changed.
---
.../apache/doris/clone/BackendLoadStatistic.java | 2 ++
.../org/apache/doris/clone/TabletSchedCtx.java | 35 ++++++++++++++++++--
.../org/apache/doris/clone/TabletScheduler.java | 38 +++++++++++++++++++---
3 files changed, 68 insertions(+), 7 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/BackendLoadStatistic.java b/fe/fe-core/src/main/java/org/apache/doris/clone/BackendLoadStatistic.java
index 47befaaccb..d040b8053c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/BackendLoadStatistic.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/BackendLoadStatistic.java
@@ -303,6 +303,8 @@ public class BackendLoadStatistic {
RootPathLoadStatistic pathStatistic = pathStatistics.get(i);
// if this is a supplement task, ignore the storage medium
if (!isSupplement && pathStatistic.getStorageMedium() != medium) {
+ LOG.debug("backend {} path {}'s storage medium {} is not {} storage medium, actual: {}",
+ beId, pathStatistic.getPath(), pathStatistic.getStorageMedium(), medium);
continue;
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
index 990153a00a..b889a70430 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
@@ -488,6 +488,7 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
if (backend == null) {
// containsBE() is currently only used for choosing dest backend to do clone task.
// return true so that it won't choose this backend.
+ LOG.debug("desc backend {} does not exist, skip. tablet: {}", beId, tabletId);
return true;
}
String host = backend.getHost();
@@ -495,13 +496,18 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
Backend be = infoService.getBackend(replica.getBackendId());
if (be == null) {
// BE has been dropped, skip it
+ LOG.debug("replica's backend {} does not exist, skip. tablet: {}", replica.getBackendId(), tabletId);
continue;
}
if (!Config.allow_replica_on_same_host && !FeConstants.runningUnitTest && host.equals(be.getHost())) {
+ LOG.debug("replica's backend {} is on same host {}, skip. tablet: {}",
+ replica.getBackendId(), host, tabletId);
return true;
}
if (replica.getBackendId() == beId) {
+ LOG.debug("replica's backend {} is same as dest backend {}, skip. tablet: {}",
+ replica.getBackendId(), beId, tabletId);
return true;
}
}
@@ -557,24 +563,34 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
List<Replica> candidates = Lists.newArrayList();
for (Replica replica : tablet.getReplicas()) {
if (exceptBeId != -1 && replica.getBackendId() == exceptBeId) {
+ LOG.debug("replica's backend {} is same as except backend {}, skip. tablet: {}",
+ replica.getBackendId(), exceptBeId, tabletId);
continue;
}
if (replica.isBad() || replica.tooSlow()) {
+ LOG.debug("replica {} is bad({}) or too slow({}), skip. tablet: {}",
+ replica.getId(), replica.isBad(), replica.tooSlow(), tabletId);
continue;
}
Backend be = infoService.getBackend(replica.getBackendId());
if (be == null || !be.isAlive()) {
// backend which is in decommission can still be the source backend
+ LOG.debug("replica's backend {} does not exist or is not alive, skip. tablet: {}",
+ replica.getBackendId(), tabletId);
continue;
}
if (replica.getLastFailedVersion() > 0) {
+ LOG.debug("replica {} has failed version {}, skip. tablet: {}",
+ replica.getId(), replica.getLastFailedVersion(), tabletId);
continue;
}
if (!replica.checkVersionCatchUp(visibleVersion, false)) {
+ LOG.debug("replica {} version {} has not catch up to visible version {}, skip. tablet: {}",
+ replica.getId(), replica.getVersion(), visibleVersion, tabletId);
continue;
}
@@ -591,14 +607,19 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
for (Replica srcReplica : candidates) {
PathSlot slot = backendsWorkingSlots.get(srcReplica.getBackendId());
if (slot == null) {
+ LOG.debug("replica's backend {} does not have working slot, skip. tablet: {}",
+ srcReplica.getBackendId(), tabletId);
continue;
}
long srcPathHash = slot.takeSlot(srcReplica.getPathHash());
- if (srcPathHash != -1) {
- setSrc(srcReplica);
- return;
+ if (srcPathHash == -1) {
+ LOG.debug("replica's backend {} does not have available slot, skip. tablet: {}",
+ srcReplica.getBackendId(), tabletId);
+ continue;
}
+ setSrc(srcReplica);
+ return;
}
throw new SchedException(Status.SCHEDULE_FAILED, "unable to find source slot");
}
@@ -629,11 +650,15 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
Replica chosenReplica = null;
for (Replica replica : tablet.getReplicas()) {
if (replica.isBad()) {
+ LOG.debug("replica {} is bad, skip. tablet: {}",
+ replica.getId(), tabletId);
continue;
}
Backend be = infoService.getBackend(replica.getBackendId());
if (be == null || !be.isScheduleAvailable()) {
+ LOG.debug("replica's backend {} does not exist or is not scheduler available, skip. tablet: {}",
+ replica.getBackendId(), tabletId);
continue;
}
@@ -644,10 +669,14 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
&& ((replica.getVersion() == visibleVersion)
|| replica.getVersion() > visibleVersion) && replica.getState() != ReplicaState.DECOMMISSION) {
// skip healthy replica
+ LOG.debug("replica {} version {} is healthy, visible version {}, replica state {}, skip. tablet: {}",
+ replica.getId(), replica.getVersion(), visibleVersion, replica.getState(), tabletId);
continue;
}
if (replica.needFurtherRepair()) {
+ LOG.debug("replica {} need further repair, choose it. tablet: {}",
+ replica.getId(), tabletId);
chosenReplica = replica;
break;
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
index b243980362..c672976b92 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
@@ -1308,11 +1308,15 @@ public class TabletScheduler extends MasterDaemon {
List<RootPathLoadStatistic> allFitPaths = Lists.newArrayList();
for (BackendLoadStatistic bes : beStatistics) {
if (!bes.isAvailable()) {
+ LOG.debug("backend {} is not available, skip. tablet: {}", bes.getBeId(), tabletCtx.getTabletId());
continue;
}
// exclude BE which already has replica of this tablet or another BE at same host has this replica
if (tabletCtx.filterDestBE(bes.getBeId())) {
+ LOG.debug("backend {} already has replica of this tablet or another BE "
+ + "at same host has this replica, skip. tablet: {}",
+ bes.getBeId(), tabletCtx.getTabletId());
continue;
}
@@ -1320,9 +1324,13 @@ public class TabletScheduler extends MasterDaemon {
// Else, check the tag.
if (forColocate) {
if (!tabletCtx.getColocateBackendsSet().contains(bes.getBeId())) {
+ LOG.debug("backend {} is not in colocate backend set, skip. tablet: {}",
+ bes.getBeId(), tabletCtx.getTabletId());
continue;
}
} else if (!bes.getTag().equals(tag)) {
+ LOG.debug("backend {}'s tag {} is not equal to tablet's tag {}, skip. tablet: {}",
+ bes.getBeId(), bes.getTag(), tag, tabletCtx.getTabletId());
continue;
}
@@ -1331,6 +1339,7 @@ public class TabletScheduler extends MasterDaemon {
resultPaths, tabletCtx.getTabletStatus() != TabletStatus.REPLICA_RELOCATING
/* if REPLICA_RELOCATING, then it is not a supplement task */);
if (!st.ok()) {
+ LOG.debug("unable to find path for tablet: {}. {}", tabletCtx, st);
// This is to solve, when we decommission some BEs with SSD disks,
// if there are no SSD disks on the remaining BEs, it will be impossible to select a
// suitable destination path.
@@ -1358,31 +1367,50 @@ public class TabletScheduler extends MasterDaemon {
// we try to find a path with specified media type, if not find, arbitrarily use one.
for (RootPathLoadStatistic rootPathLoadStatistic : allFitPaths) {
if (rootPathLoadStatistic.getStorageMedium() != tabletCtx.getStorageMedium()) {
+ LOG.debug("backend {}'s path {}'s storage medium {} "
+ + "is not equal to tablet's storage medium {}, skip. tablet: {}",
+ rootPathLoadStatistic.getBeId(), rootPathLoadStatistic.getPathHash(),
+ rootPathLoadStatistic.getStorageMedium(), tabletCtx.getStorageMedium(),
+ tabletCtx.getTabletId());
continue;
}
PathSlot slot = backendsWorkingSlots.get(rootPathLoadStatistic.getBeId());
if (slot == null) {
+ LOG.debug("backend {}'s path {}'s slot is null, skip. tablet: {}",
+ rootPathLoadStatistic.getBeId(), rootPathLoadStatistic.getPathHash(),
+ tabletCtx.getTabletId());
continue;
}
long pathHash = slot.takeSlot(rootPathLoadStatistic.getPathHash());
- if (pathHash != -1) {
- return rootPathLoadStatistic;
+ if (pathHash == -1) {
+ LOG.debug("backend {}'s path {}'s slot is full, skip. tablet: {}",
+ rootPathLoadStatistic.getBeId(), rootPathLoadStatistic.getPathHash(),
+ tabletCtx.getTabletId());
+ continue;
}
+ return rootPathLoadStatistic;
}
// no root path with specified media type is found, get arbitrary one.
for (RootPathLoadStatistic rootPathLoadStatistic : allFitPaths) {
PathSlot slot = backendsWorkingSlots.get(rootPathLoadStatistic.getBeId());
if (slot == null) {
+ LOG.debug("backend {}'s path {}'s slot is null, skip. tablet: {}",
+ rootPathLoadStatistic.getBeId(), rootPathLoadStatistic.getPathHash(),
+ tabletCtx.getTabletId());
continue;
}
long pathHash = slot.takeSlot(rootPathLoadStatistic.getPathHash());
- if (pathHash != -1) {
- return rootPathLoadStatistic;
+ if (pathHash == -1) {
+ LOG.debug("backend {}'s path {}'s slot is full, skip. tablet: {}",
+ rootPathLoadStatistic.getBeId(), rootPathLoadStatistic.getPathHash(),
+ tabletCtx.getTabletId());
+ continue;
}
+ return rootPathLoadStatistic;
}
throw new SchedException(Status.SCHEDULE_FAILED, "unable to find dest path which can be fit in");
@@ -1709,10 +1737,12 @@ public class TabletScheduler extends MasterDaemon {
Slot slot = pathSlots.get(pathHash);
if (slot == null) {
+ LOG.debug("path {} is not exist", pathHash);
return -1;
}
slot.rectify();
if (slot.available <= 0) {
+ LOG.debug("path {} has no available slot", pathHash);
return -1;
}
slot.available--;
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org