You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/07/15 11:59:06 UTC

[doris] branch master updated: [fix](alter)(tablet-scheduler) fix unexpected exception with compaction_too_slow message when add rollup for olap table (#10827)

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 6751e5b23c [fix](alter)(tablet-scheduler) fix unexpected exception with compaction_too_slow message when add rollup for olap table (#10827)
6751e5b23c is described below

commit 6751e5b23c8bee302a6802638bd78739128b4355
Author: caiconghui <55...@users.noreply.github.com>
AuthorDate: Fri Jul 15 19:59:00 2022 +0800

    [fix](alter)(tablet-scheduler) fix unexpected exception with compaction_too_slow message when add rollup for olap table (#10827)
---
 .../org/apache/doris/alter/MaterializedViewHandler.java | 10 ++++++----
 .../java/org/apache/doris/clone/TabletSchedCtx.java     | 17 ++++++++---------
 .../java/org/apache/doris/clone/TabletScheduler.java    | 14 ++++----------
 .../src/main/java/org/apache/doris/common/Config.java   |  2 +-
 .../apache/doris/clone/TabletReplicaTooSlowTest.java    |  3 ---
 5 files changed, 19 insertions(+), 27 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/MaterializedViewHandler.java b/fe/fe-core/src/main/java/org/apache/doris/alter/MaterializedViewHandler.java
index 9612fbb83d..1d2b9a7cf6 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/alter/MaterializedViewHandler.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/alter/MaterializedViewHandler.java
@@ -37,6 +37,7 @@ import org.apache.doris.catalog.OlapTable.OlapTableState;
 import org.apache.doris.catalog.Partition;
 import org.apache.doris.catalog.PrimitiveType;
 import org.apache.doris.catalog.Replica;
+import org.apache.doris.catalog.Replica.ReplicaState;
 import org.apache.doris.catalog.Table;
 import org.apache.doris.catalog.Tablet;
 import org.apache.doris.catalog.TabletInvertedIndex;
@@ -384,18 +385,19 @@ public class MaterializedViewHandler extends AlterHandler {
                 for (Replica baseReplica : baseReplicas) {
                     long mvReplicaId = catalog.getNextId();
                     long backendId = baseReplica.getBackendId();
-                    if (baseReplica.getState() == Replica.ReplicaState.CLONE
-                            || baseReplica.getState() == Replica.ReplicaState.DECOMMISSION
+                    if (baseReplica.getState() == ReplicaState.CLONE
+                            || baseReplica.getState() == ReplicaState.DECOMMISSION
+                            || baseReplica.getState() == ReplicaState.COMPACTION_TOO_SLOW
                             || baseReplica.getLastFailedVersion() > 0) {
                         LOG.info("base replica {} of tablet {} state is {}, and last failed version is {},"
                                         + " skip creating rollup replica", baseReplica.getId(), baseTabletId,
                                 baseReplica.getState(), baseReplica.getLastFailedVersion());
                         continue;
                     }
-                    Preconditions.checkState(baseReplica.getState() == Replica.ReplicaState.NORMAL,
+                    Preconditions.checkState(baseReplica.getState() == ReplicaState.NORMAL,
                             baseReplica.getState());
                     // replica's init state is ALTER, so that tablet report process will ignore its report
-                    Replica mvReplica = new Replica(mvReplicaId, backendId, Replica.ReplicaState.ALTER,
+                    Replica mvReplica = new Replica(mvReplicaId, backendId, ReplicaState.ALTER,
                             Partition.PARTITION_INIT_VERSION, mvSchemaHash);
                     newTablet.addReplica(mvReplica);
                     healthyReplicaNum++;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
index 57a77bb33f..87ed910fd7 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
@@ -527,28 +527,27 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> {
 
     public boolean compactionRecovered() {
         Replica chosenReplica = null;
-        long maxVersionCount = -1;
-        long minVersionCount = Integer.MAX_VALUE;
+        long maxVersionCount = Integer.MIN_VALUE;
         for (Replica replica : tablet.getReplicas()) {
             if (replica.getVersionCount() > maxVersionCount) {
                 maxVersionCount = replica.getVersionCount();
                 chosenReplica = replica;
             }
-            if (replica.getVersionCount() < minVersionCount) {
-                minVersionCount = replica.getVersionCount();
-            }
         }
         boolean recovered = false;
         for (Replica replica : tablet.getReplicas()) {
-            if (replica.isAlive() && replica.tooSlow() && !chosenReplica.equals(replica)) {
-                chosenReplica.setState(ReplicaState.NORMAL);
-                recovered = true;
+            if (replica.isAlive() && replica.tooSlow() && (!replica.equals(chosenReplica)
+                    || replica.getVersionCount() < Config.min_version_count_indicate_replica_compaction_too_slow)) {
+                if (chosenReplica != null) {
+                    chosenReplica.setState(ReplicaState.NORMAL);
+                    recovered = true;
+                }
             }
         }
         return recovered;
     }
 
-    // database lock should be held.
+    // table lock should be held.
     // If exceptBeId != -1, should not choose src replica with same BE id as exceptBeId
     public void chooseSrcReplica(Map<Long, PathSlot> backendsWorkingSlots, long exceptBeId) throws SchedException {
         /*
diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
index cd731bb0ff..7fb1b75481 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
@@ -1071,9 +1071,7 @@ public class TabletScheduler extends MasterDaemon {
      */
     private void handleReplicaTooSlow(TabletSchedCtx tabletCtx) throws SchedException {
         Replica chosenReplica = null;
-        Replica minReplica = null;
         long maxVersionCount = -1;
-        long minVersionCount = Integer.MAX_VALUE;
         int normalReplicaCount = 0;
         for (Replica replica : tabletCtx.getReplicas()) {
             if (replica.isAlive() && !replica.tooSlow()) {
@@ -1083,20 +1081,16 @@ public class TabletScheduler extends MasterDaemon {
                 maxVersionCount = replica.getVersionCount();
                 chosenReplica = replica;
             }
-            if (replica.getVersionCount() < minVersionCount) {
-                minVersionCount = replica.getVersionCount();
-                minReplica = replica;
-            }
         }
-
-        if (chosenReplica != null && !chosenReplica.equals(minReplica) && minReplica.isAlive() && !minReplica.tooSlow()
-                && normalReplicaCount >= 1) {
+        if (chosenReplica != null && chosenReplica.isAlive() && !chosenReplica.tooSlow()
+                && chosenReplica.getVersionCount() > Config.min_version_count_indicate_replica_compaction_too_slow
+                && normalReplicaCount - 1 >= tabletCtx.getReplicas().size() / 2 + 1) {
             chosenReplica.setState(ReplicaState.COMPACTION_TOO_SLOW);
             LOG.info("set replica id :{} tablet id: {}, backend id: {} to COMPACTION_TOO_SLOW",
                     chosenReplica.getId(), tabletCtx.getTablet().getId(), chosenReplica.getBackendId());
             throw new SchedException(Status.FINISHED, "set replica to COMPACTION_TOO_SLOW");
         }
-        throw new SchedException(Status.FINISHED, "No replica too slow");
+        throw new SchedException(Status.FINISHED, "No replica set to COMPACTION_TOO_SLOW");
     }
 
     private void deleteReplicaInternal(TabletSchedCtx tabletCtx,
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java
index 5aaac04a42..749322e48a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java
@@ -1587,7 +1587,7 @@ public class Config extends ConfigBase {
     public static int min_version_count_indicate_replica_compaction_too_slow = 200;
 
     /**
-     * The valid ratio threshold of the difference between the version count of the slowest replicaand the fastest
+     * The valid ratio threshold of the difference between the version count of the slowest replica and the fastest
      * replica. If repair_slow_replica is set to true, it is used to determine whether to repair the slowest replica
      */
     @ConfField(mutable = true, masterOnly = true)
diff --git a/fe/fe-core/src/test/java/org/apache/doris/clone/TabletReplicaTooSlowTest.java b/fe/fe-core/src/test/java/org/apache/doris/clone/TabletReplicaTooSlowTest.java
index b298723ed8..1fa209f8f2 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/clone/TabletReplicaTooSlowTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/clone/TabletReplicaTooSlowTest.java
@@ -139,9 +139,6 @@ public class TabletReplicaTooSlowTest {
             Backend be = Catalog.getCurrentSystemInfo().getBackend(beId);
             List<Long> pathHashes = be.getDisks().values().stream()
                     .map(DiskInfo::getPathHash).collect(Collectors.toList());
-            if (be == null) {
-                continue;
-            }
             Replica replica = cell.getValue();
             replica.setVersionCount(versionCount);
             versionCount = versionCount + 200;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org