You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/07/15 11:59:06 UTC
[doris] branch master updated: [fix](alter)(tablet-scheduler) fix unexpected exception with compaction_too_slow message when add rollup for olap table (#10827)
This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 6751e5b23c [fix](alter)(tablet-scheduler) fix unexpected exception with compaction_too_slow message when add rollup for olap table (#10827)
6751e5b23c is described below
commit 6751e5b23c8bee302a6802638bd78739128b4355
Author: caiconghui <55...@users.noreply.github.com>
AuthorDate: Fri Jul 15 19:59:00 2022 +0800
[fix](alter)(tablet-scheduler) fix unexpected exception with compaction_too_slow message when add rollup for olap table (#10827)
---
.../org/apache/doris/alter/MaterializedViewHandler.java | 10 ++++++----
.../java/org/apache/doris/clone/TabletSchedCtx.java | 17 ++++++++---------
.../java/org/apache/doris/clone/TabletScheduler.java | 14 ++++----------
.../src/main/java/org/apache/doris/common/Config.java | 2 +-
.../apache/doris/clone/TabletReplicaTooSlowTest.java | 3 ---
5 files changed, 19 insertions(+), 27 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/MaterializedViewHandler.java b/fe/fe-core/src/main/java/org/apache/doris/alter/MaterializedViewHandler.java
index 9612fbb83d..1d2b9a7cf6 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/alter/MaterializedViewHandler.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/alter/MaterializedViewHandler.java
@@ -37,6 +37,7 @@ import org.apache.doris.catalog.OlapTable.OlapTableState;
import org.apache.doris.catalog.Partition;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.catalog.Replica;
+import org.apache.doris.catalog.Replica.ReplicaState;
import org.apache.doris.catalog.Table;
import org.apache.doris.catalog.Tablet;
import org.apache.doris.catalog.TabletInvertedIndex;
@@ -384,18 +385,19 @@ public class MaterializedViewHandler extends AlterHandler {
for (Replica baseReplica : baseReplicas) {
long mvReplicaId = catalog.getNextId();
long backendId = baseReplica.getBackendId();
- if (baseReplica.getState() == Replica.ReplicaState.CLONE
- || baseReplica.getState() == Replica.ReplicaState.DECOMMISSION
+ if (baseReplica.getState() == ReplicaState.CLONE
+ || baseReplica.getState() == ReplicaState.DECOMMISSION
+ || baseReplica.getState() == ReplicaState.COMPACTION_TOO_SLOW
|| baseReplica.getLastFailedVersion() > 0) {
LOG.info("base replica {} of tablet {} state is {}, and last failed version is {},"
+ " skip creating rollup replica", baseReplica.getId(), baseTabletId,
baseReplica.getState(), baseReplica.getLastFailedVersion());
continue;
}
- Preconditions.checkState(baseReplica.getState() == Replica.ReplicaState.NORMAL,
+ Preconditions.checkState(baseReplica.getState() == ReplicaState.NORMAL,
baseReplica.getState());
// replica's init state is ALTER, so that tablet report process will ignore its report
- Replica mvReplica = new Replica(mvReplicaId, backendId, Replica.ReplicaState.ALTER,
+ Replica mvReplica = new Replica(mvReplicaId, backendId, ReplicaState.ALTER,
Partition.PARTITION_INIT_VERSION, mvSchemaHash);
newTablet.addReplica(mvReplica);
healthyReplicaNum++;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
index 57a77bb33f..87ed910fd7 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
@@ -527,28 +527,27 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
public boolean compactionRecovered() {
Replica chosenReplica = null;
- long maxVersionCount = -1;
- long minVersionCount = Integer.MAX_VALUE;
+ long maxVersionCount = Integer.MIN_VALUE;
for (Replica replica : tablet.getReplicas()) {
if (replica.getVersionCount() > maxVersionCount) {
maxVersionCount = replica.getVersionCount();
chosenReplica = replica;
}
- if (replica.getVersionCount() < minVersionCount) {
- minVersionCount = replica.getVersionCount();
- }
}
boolean recovered = false;
for (Replica replica : tablet.getReplicas()) {
- if (replica.isAlive() && replica.tooSlow() && !chosenReplica.equals(replica)) {
- chosenReplica.setState(ReplicaState.NORMAL);
- recovered = true;
+ if (replica.isAlive() && replica.tooSlow() && (!replica.equals(chosenReplica)
+ || replica.getVersionCount() < Config.min_version_count_indicate_replica_compaction_too_slow)) {
+ if (chosenReplica != null) {
+ chosenReplica.setState(ReplicaState.NORMAL);
+ recovered = true;
+ }
}
}
return recovered;
}
- // database lock should be held.
+ // table lock should be held.
// If exceptBeId != -1, should not choose src replica with same BE id as exceptBeId
public void chooseSrcReplica(Map<Long, PathSlot> backendsWorkingSlots, long exceptBeId) throws SchedException {
/*
diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
index cd731bb0ff..7fb1b75481 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
@@ -1071,9 +1071,7 @@ public class TabletScheduler extends MasterDaemon {
*/
private void handleReplicaTooSlow(TabletSchedCtx tabletCtx) throws SchedException {
Replica chosenReplica = null;
- Replica minReplica = null;
long maxVersionCount = -1;
- long minVersionCount = Integer.MAX_VALUE;
int normalReplicaCount = 0;
for (Replica replica : tabletCtx.getReplicas()) {
if (replica.isAlive() && !replica.tooSlow()) {
@@ -1083,20 +1081,16 @@ public class TabletScheduler extends MasterDaemon {
maxVersionCount = replica.getVersionCount();
chosenReplica = replica;
}
- if (replica.getVersionCount() < minVersionCount) {
- minVersionCount = replica.getVersionCount();
- minReplica = replica;
- }
}
-
- if (chosenReplica != null && !chosenReplica.equals(minReplica) && minReplica.isAlive() && !minReplica.tooSlow()
- && normalReplicaCount >= 1) {
+ if (chosenReplica != null && chosenReplica.isAlive() && !chosenReplica.tooSlow()
+ && chosenReplica.getVersionCount() > Config.min_version_count_indicate_replica_compaction_too_slow
+ && normalReplicaCount - 1 >= tabletCtx.getReplicas().size() / 2 + 1) {
chosenReplica.setState(ReplicaState.COMPACTION_TOO_SLOW);
LOG.info("set replica id :{} tablet id: {}, backend id: {} to COMPACTION_TOO_SLOW",
chosenReplica.getId(), tabletCtx.getTablet().getId(), chosenReplica.getBackendId());
throw new SchedException(Status.FINISHED, "set replica to COMPACTION_TOO_SLOW");
}
- throw new SchedException(Status.FINISHED, "No replica too slow");
+ throw new SchedException(Status.FINISHED, "No replica set to COMPACTION_TOO_SLOW");
}
private void deleteReplicaInternal(TabletSchedCtx tabletCtx,
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java
index 5aaac04a42..749322e48a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java
@@ -1587,7 +1587,7 @@ public class Config extends ConfigBase {
public static int min_version_count_indicate_replica_compaction_too_slow = 200;
/**
- * The valid ratio threshold of the difference between the version count of the slowest replicaand the fastest
+ * The valid ratio threshold of the difference between the version count of the slowest replica and the fastest
* replica. If repair_slow_replica is set to true, it is used to determine whether to repair the slowest replica
*/
@ConfField(mutable = true, masterOnly = true)
diff --git a/fe/fe-core/src/test/java/org/apache/doris/clone/TabletReplicaTooSlowTest.java b/fe/fe-core/src/test/java/org/apache/doris/clone/TabletReplicaTooSlowTest.java
index b298723ed8..1fa209f8f2 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/clone/TabletReplicaTooSlowTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/clone/TabletReplicaTooSlowTest.java
@@ -139,9 +139,6 @@ public class TabletReplicaTooSlowTest {
Backend be = Catalog.getCurrentSystemInfo().getBackend(beId);
List<Long> pathHashes = be.getDisks().values().stream()
.map(DiskInfo::getPathHash).collect(Collectors.toList());
- if (be == null) {
- continue;
- }
Replica replica = cell.getValue();
replica.setVersionCount(versionCount);
versionCount = versionCount + 200;
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org