You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2020/07/22 14:42:41 UTC
[incubator-doris] branch master updated: [TabletRepair] Delete bad
replicas when no BE can be used to create new replica
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new e4f5a29 [TabletRepair] Delete bad replicas when no BE can be used to create new replica
e4f5a29 is described below
commit e4f5a2936bfecf7dd0465706f0cb2112fb852be6
Author: Lijia Liu <li...@yeah.net>
AuthorDate: Wed Jul 22 22:42:31 2020 +0800
[TabletRepair] Delete bad replicas when no BE can be used to create new replica
When there is no available BE for relocating replicas, delete the bad replica first.
---
.../java/org/apache/doris/catalog/OlapTable.java | 4 +--
.../main/java/org/apache/doris/catalog/Tablet.java | 33 ++++++++++++++++------
.../java/org/apache/doris/clone/TabletChecker.java | 4 +--
.../org/apache/doris/clone/TabletSchedCtx.java | 4 +--
.../org/apache/doris/clone/TabletScheduler.java | 4 +--
.../apache/doris/common/proc/StatisticProcDir.java | 18 ++++++------
.../org/apache/doris/master/ReportHandler.java | 4 +--
7 files changed, 42 insertions(+), 29 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
index f8f4423..10603eb 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
@@ -1229,7 +1229,7 @@ public class OlapTable extends Table {
}
public boolean isStable(SystemInfoService infoService, TabletScheduler tabletScheduler, String clusterName) {
- int availableBackendsNum = infoService.getClusterBackendIds(clusterName, true).size();
+ List<Long> aliveBeIdsInCluster = infoService.getClusterBackendIds(clusterName, true);
for (Partition partition : idToPartition.values()) {
long visibleVersion = partition.getVisibleVersion();
long visibleVersionHash = partition.getVisibleVersionHash();
@@ -1242,7 +1242,7 @@ public class OlapTable extends Table {
Pair<TabletStatus, TabletSchedCtx.Priority> statusPair = tablet.getHealthStatusWithPriority(
infoService, clusterName, visibleVersion, visibleVersionHash, replicationNum,
- availableBackendsNum);
+ aliveBeIdsInCluster);
if (statusPair.first != TabletStatus.HEALTHY) {
LOG.info("table {} is not stable because tablet {} status is {}. replicas: {}",
id, tablet.getId(), statusPair.first, tablet.getReplicas());
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
index d8eb691..87e9309 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
@@ -42,6 +42,7 @@ import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
+import java.util.stream.Collectors;
/**
* This class represents the olap tablet related metadata.
@@ -407,7 +408,7 @@ public class Tablet extends MetaObject implements Writable {
public Pair<TabletStatus, TabletSchedCtx.Priority> getHealthStatusWithPriority(
SystemInfoService systemInfoService, String clusterName,
long visibleVersion, long visibleVersionHash, int replicationNum,
- int availableBackendsNum) {
+ List<Long> aliveBeIdsInCluster) {
int alive = 0;
int aliveAndVersionComplete = 0;
@@ -453,15 +454,16 @@ public class Tablet extends MetaObject implements Writable {
}
// 1. alive replicas are not enough
- if (alive < replicationNum && replicas.size() >= availableBackendsNum
- && availableBackendsNum >= replicationNum && replicationNum > 1) {
+ int aliveBackendsNum = aliveBeIdsInCluster.size();
+ if (alive < replicationNum && replicas.size() >= aliveBackendsNum
+ && aliveBackendsNum >= replicationNum && replicationNum > 1) {
// there is no enough backend for us to create a new replica, so we have to delete an existing replica,
// so there can be available backend for us to create a new replica.
// And if there is only one replica, we will not handle it(maybe need human interference)
// condition explain:
// 1. alive < replicationNum: replica is missing or bad
- // 2. replicas.size() >= availableBackendsNum: the existing replicas occupies all available backends
- // 3. availableBackendsNum >= replicationNum: make sure after deleting, there will be at least one backend for new replica.
+ // 2. replicas.size() >= aliveBackendsNum: the existing replicas occupies all available backends
+ // 3. aliveBackendsNum >= replicationNum: make sure after deleting, there will be at least one backend for new replica.
// 4. replicationNum > 1: if replication num is set to 1, do not delete any replica, for safety reason
return Pair.create(TabletStatus.FORCE_REDUNDANT, TabletSchedCtx.Priority.VERY_HIGH);
} else if (alive < (replicationNum / 2) + 1) {
@@ -484,10 +486,23 @@ public class Tablet extends MetaObject implements Writable {
}
// 3. replica is under relocating
- if (stable < (replicationNum / 2) + 1) {
- return Pair.create(TabletStatus.REPLICA_RELOCATING, TabletSchedCtx.Priority.NORMAL);
- } else if (stable < replicationNum) {
- return Pair.create(TabletStatus.REPLICA_RELOCATING, TabletSchedCtx.Priority.LOW);
+ if (stable < replicationNum) {
+ List<Long> replicaBeIds = replicas.stream()
+ .map(Replica::getBackendId).collect(Collectors.toList());
+ List<Long> availableBeIds = aliveBeIdsInCluster.stream()
+ .filter(systemInfoService::checkBackendAvailable)
+ .collect(Collectors.toList());
+ if (replicaBeIds.containsAll(availableBeIds)
+ && availableBeIds.size() >= replicationNum
+ && replicationNum > 1) { // No BE can be choose to create a new replica
+ return Pair.create(TabletStatus.FORCE_REDUNDANT,
+ stable < (replicationNum / 2) + 1 ? TabletSchedCtx.Priority.NORMAL : TabletSchedCtx.Priority.LOW);
+ }
+ if (stable < (replicationNum / 2) + 1) {
+ return Pair.create(TabletStatus.REPLICA_RELOCATING, TabletSchedCtx.Priority.NORMAL);
+ } else if (stable < replicationNum) {
+ return Pair.create(TabletStatus.REPLICA_RELOCATING, TabletSchedCtx.Priority.LOW);
+ }
}
// 4. healthy replicas in cluster are not enough
diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletChecker.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletChecker.java
index 850fe04..952f564 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletChecker.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletChecker.java
@@ -206,7 +206,7 @@ public class TabletChecker extends MasterDaemon {
db.readLock();
try {
- int availableBackendsNum = infoService.getClusterBackendIds(db.getClusterName(), true).size();
+ List<Long> aliveBeIdsInCluster = infoService.getClusterBackendIds(db.getClusterName(), true);
for (Table table : db.getTables()) {
if (!table.needSchedule()) {
continue;
@@ -239,7 +239,7 @@ public class TabletChecker extends MasterDaemon {
partition.getVisibleVersion(),
partition.getVisibleVersionHash(),
olapTbl.getPartitionInfo().getReplicationNum(partition.getId()),
- availableBackendsNum);
+ aliveBeIdsInCluster);
if (statusWithPrio.first == TabletStatus.HEALTHY) {
// Only set last status check time when status is healthy.
diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
index 059306f..98e1dab 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
@@ -799,11 +799,11 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
throw new SchedException(Status.UNRECOVERABLE, "tablet does not exist");
}
- int availableBackendsNum = infoService.getClusterBackendIds(db.getClusterName(), true).size();
+ List<Long> aliveBeIdsInCluster = infoService.getClusterBackendIds(db.getClusterName(), true);
short replicationNum = olapTable.getPartitionInfo().getReplicationNum(partitionId);
Pair<TabletStatus, TabletSchedCtx.Priority> pair = tablet.getHealthStatusWithPriority(
infoService, db.getClusterName(), visibleVersion, visibleVersionHash, replicationNum,
- availableBackendsNum);
+ aliveBeIdsInCluster);
if (pair.first == TabletStatus.HEALTHY) {
throw new SchedException(Status.FINISHED, "tablet is healthy");
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
index 4ecfe56..83fa29c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
@@ -495,13 +495,13 @@ public class TabletScheduler extends MasterDaemon {
statusPair = Pair.create(st, Priority.HIGH);
tabletCtx.setColocateGroupBackendIds(backendsSet);
} else {
- int availableBackendsNum = infoService.getClusterBackendIds(db.getClusterName(), true).size();
+ List<Long> aliveBeIdsInCluster = infoService.getClusterBackendIds(db.getClusterName(), true);
statusPair = tablet.getHealthStatusWithPriority(
infoService, tabletCtx.getCluster(),
partition.getVisibleVersion(),
partition.getVisibleVersionHash(),
tbl.getPartitionInfo().getReplicationNum(partition.getId()),
- availableBackendsNum);
+ aliveBeIdsInCluster);
}
if (tabletCtx.getType() == TabletSchedCtx.Type.BALANCE && tableState != OlapTableState.NORMAL) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/proc/StatisticProcDir.java b/fe/fe-core/src/main/java/org/apache/doris/common/proc/StatisticProcDir.java
index e75a3fe..29f48c7 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/proc/StatisticProcDir.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/proc/StatisticProcDir.java
@@ -17,6 +17,10 @@
package org.apache.doris.common.proc;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Multimap;
import org.apache.doris.catalog.Catalog;
import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.MaterializedIndex;
@@ -34,19 +38,13 @@ import org.apache.doris.common.util.ListComparator;
import org.apache.doris.system.SystemInfoService;
import org.apache.doris.task.AgentTaskQueue;
import org.apache.doris.thrift.TTaskType;
-
-import com.google.common.base.Preconditions;
-import com.google.common.collect.HashMultimap;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Multimap;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.Logger;
-
public class StatisticProcDir implements ProcDirInterface {
public static final ImmutableList<String> TITLE_NAMES = new ImmutableList.Builder<String>()
.add("DbId").add("DbName").add("TableNum").add("PartitionNum")
@@ -108,7 +106,7 @@ public class StatisticProcDir implements ProcDirInterface {
}
++totalDbNum;
- int availableBackendsNum = infoService.getClusterBackendIds(db.getClusterName(), true).size();
+ List<Long> aliveBeIdsInCluster = infoService.getClusterBackendIds(db.getClusterName(), true);
db.readLock();
try {
int dbTableNum = 0;
@@ -137,7 +135,7 @@ public class StatisticProcDir implements ProcDirInterface {
Pair<TabletStatus, Priority> res = tablet.getHealthStatusWithPriority(
infoService, db.getClusterName(),
partition.getVisibleVersion(), partition.getVisibleVersionHash(),
- replicationNum, availableBackendsNum);
+ replicationNum, aliveBeIdsInCluster);
// here we treat REDUNDANT as HEALTHY, for user friendly.
if (res.first != TabletStatus.HEALTHY && res.first != TabletStatus.REDUNDANT
diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java
index c8c1d18..8d40812 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java
@@ -1027,10 +1027,10 @@ public class ReportHandler extends Daemon {
return;
}
- int availableBackendsNum = infoService.getClusterBackendIds(db.getClusterName(), true).size();
+ List<Long> aliveBeIdsInCluster = infoService.getClusterBackendIds(db.getClusterName(), true);
Pair<TabletStatus, TabletSchedCtx.Priority> status = tablet.getHealthStatusWithPriority(infoService,
db.getClusterName(), visibleVersion, visibleVersionHash,
- replicationNum, availableBackendsNum);
+ replicationNum, aliveBeIdsInCluster);
if (status.first == TabletStatus.VERSION_INCOMPLETE || status.first == TabletStatus.REPLICA_MISSING) {
long lastFailedVersion = -1L;
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org