You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ratis.apache.org by sz...@apache.org on 2020/12/11 01:16:00 UTC

[incubator-ratis] branch master updated: RATIS-1216. State machine preAppend callback can throw exception without having leader to step down (#344). Contributed by Ethan Rose

This is an automated email from the ASF dual-hosted git repository.

szetszwo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ratis.git


The following commit(s) were added to refs/heads/master by this push:
     new a562d9e  RATIS-1216. State machine preAppend callback can throw exception without having leader to step down (#344).  Contributed by Ethan Rose
a562d9e is described below

commit a562d9e02289131da2cda98ecf547f7694e0a024
Author: Ethan Rose <33...@users.noreply.github.com>
AuthorDate: Thu Dec 10 20:15:52 2020 -0500

    RATIS-1216. State machine preAppend callback can throw exception without having leader to step down (#344).  Contributed by Ethan Rose
---
 .../protocol/exceptions/StateMachineException.java |  27 +++++
 .../TestPreAppendLeaderStepDownWithHadoopRpc.java  |  22 +---
 .../apache/ratis/server/impl/RaftServerImpl.java   |   2 +-
 .../org/apache/ratis/server/raftlog/RaftLog.java   |   2 +
 .../server/impl/PreAppendLeaderStepDownTest.java   | 111 +++++++++++++++++++++
 .../grpc/TestPreAppendLeaderStepDownWithGrpc.java  |  22 +---
 .../TestPreAppendLeaderStepDownWithNetty.java      |  22 +---
 ...estPreAppendLeaderStepDownWithSimulatedRpc.java |  22 +---
 8 files changed, 161 insertions(+), 69 deletions(-)

diff --git a/ratis-common/src/main/java/org/apache/ratis/protocol/exceptions/StateMachineException.java b/ratis-common/src/main/java/org/apache/ratis/protocol/exceptions/StateMachineException.java
index 6a97203..ccee451 100644
--- a/ratis-common/src/main/java/org/apache/ratis/protocol/exceptions/StateMachineException.java
+++ b/ratis-common/src/main/java/org/apache/ratis/protocol/exceptions/StateMachineException.java
@@ -20,18 +20,45 @@ package org.apache.ratis.protocol.exceptions;
 import org.apache.ratis.protocol.RaftGroupMemberId;
 
 public class StateMachineException extends RaftException {
+  private final boolean leaderShouldStepDown;
+
   public StateMachineException(RaftGroupMemberId serverId, Throwable cause) {
     // cause.getMessage is added to this exception message as the exception received through
     // RPC call contains similar message but Simulated RPC doesn't. Adding the message
     // from cause to this exception makes it consistent across simulated and other RPC implementations.
     super(cause.getClass().getName() + " from Server " + serverId + ": " + cause.getMessage(), cause);
+    this.leaderShouldStepDown = true;
   }
 
   public StateMachineException(String msg) {
     super(msg);
+    this.leaderShouldStepDown = true;
   }
 
   public StateMachineException(String message, Throwable cause) {
     super(message, cause);
+    this.leaderShouldStepDown = true;
+  }
+
+  public StateMachineException(RaftGroupMemberId serverId, Throwable cause, boolean leaderShouldStepDown) {
+    // cause.getMessage is added to this exception message as the exception received through
+    // RPC call contains similar message but Simulated RPC doesn't. Adding the message
+    // from cause to this exception makes it consistent across simulated and other RPC implementations.
+    super(cause.getClass().getName() + " from Server " + serverId + ": " + cause.getMessage(), cause);
+    this.leaderShouldStepDown = leaderShouldStepDown;
+  }
+
+  public StateMachineException(String msg, boolean leaderShouldStepDown) {
+    super(msg);
+    this.leaderShouldStepDown = leaderShouldStepDown;
+  }
+
+  public StateMachineException(String message, Throwable cause, boolean leaderShouldStepDown) {
+    super(message, cause);
+    this.leaderShouldStepDown = leaderShouldStepDown;
+  }
+
+  public boolean leaderShouldStepDown() {
+    return leaderShouldStepDown;
   }
 }
diff --git a/ratis-common/src/main/java/org/apache/ratis/protocol/exceptions/StateMachineException.java b/ratis-hadoop/src/test/java/org/apache/ratis/hadooprpc/TestPreAppendLeaderStepDownWithHadoopRpc.java
similarity index 50%
copy from ratis-common/src/main/java/org/apache/ratis/protocol/exceptions/StateMachineException.java
copy to ratis-hadoop/src/test/java/org/apache/ratis/hadooprpc/TestPreAppendLeaderStepDownWithHadoopRpc.java
index 6a97203..e5e4e1d 100644
--- a/ratis-common/src/main/java/org/apache/ratis/protocol/exceptions/StateMachineException.java
+++ b/ratis-hadoop/src/test/java/org/apache/ratis/hadooprpc/TestPreAppendLeaderStepDownWithHadoopRpc.java
@@ -15,23 +15,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.ratis.protocol.exceptions;
+package org.apache.ratis.hadooprpc;
 
-import org.apache.ratis.protocol.RaftGroupMemberId;
+import org.apache.ratis.server.impl.PreAppendLeaderStepDownTest;
 
-public class StateMachineException extends RaftException {
-  public StateMachineException(RaftGroupMemberId serverId, Throwable cause) {
-    // cause.getMessage is added to this exception message as the exception received through
-    // RPC call contains similar message but Simulated RPC doesn't. Adding the message
-    // from cause to this exception makes it consistent across simulated and other RPC implementations.
-    super(cause.getClass().getName() + " from Server " + serverId + ": " + cause.getMessage(), cause);
-  }
-
-  public StateMachineException(String msg) {
-    super(msg);
-  }
-
-  public StateMachineException(String message, Throwable cause) {
-    super(message, cause);
-  }
+public class TestPreAppendLeaderStepDownWithHadoopRpc
+    extends PreAppendLeaderStepDownTest<MiniRaftClusterWithHadoopRpc>
+    implements MiniRaftClusterWithHadoopRpc.Factory.Get {
 }
diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java
index 6cccce0..2a1f536 100644
--- a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java
+++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java
@@ -671,7 +671,7 @@ class RaftServerImpl implements RaftServer.Division,
         RaftClientReply exceptionReply = newExceptionReply(request, e);
         cacheEntry.failWithReply(exceptionReply);
         // leader will step down here
-        if (getInfo().isLeader()) {
+        if (e.leaderShouldStepDown() && getInfo().isLeader()) {
           leaderState.submitStepDownEvent(LeaderState.StepDownReason.STATE_MACHINE_EXCEPTION);
         }
         return CompletableFuture.completedFuture(exceptionReply);
diff --git a/ratis-server/src/main/java/org/apache/ratis/server/raftlog/RaftLog.java b/ratis-server/src/main/java/org/apache/ratis/server/raftlog/RaftLog.java
index 1e2d11b..1a5fcb9 100644
--- a/ratis-server/src/main/java/org/apache/ratis/server/raftlog/RaftLog.java
+++ b/ratis-server/src/main/java/org/apache/ratis/server/raftlog/RaftLog.java
@@ -212,6 +212,8 @@ public abstract class RaftLog implements RaftLogSequentialOps, Closeable {
       // the SM wants to attach a logic depending on ordered execution in the log commit order.
       try {
         operation = operation.preAppendTransaction();
+      } catch (StateMachineException e) {
+        throw e;
       } catch (IOException e) {
         throw new StateMachineException(memberId, e);
       }
diff --git a/ratis-server/src/test/java/org/apache/ratis/server/impl/PreAppendLeaderStepDownTest.java b/ratis-server/src/test/java/org/apache/ratis/server/impl/PreAppendLeaderStepDownTest.java
new file mode 100644
index 0000000..adebb1f
--- /dev/null
+++ b/ratis-server/src/test/java/org/apache/ratis/server/impl/PreAppendLeaderStepDownTest.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.ratis.server.impl;
+
+import org.apache.log4j.Level;
+import org.apache.ratis.BaseTest;
+import org.apache.ratis.RaftTestUtil;
+import org.apache.ratis.client.RaftClient;
+import org.apache.ratis.client.RaftClientRpc;
+import org.apache.ratis.conf.RaftProperties;
+import org.apache.ratis.protocol.RaftClientReply;
+import org.apache.ratis.protocol.RaftClientRequest;
+import org.apache.ratis.protocol.exceptions.StateMachineException;
+import org.apache.ratis.server.RaftServer;
+import org.apache.ratis.server.raftlog.RaftLog;
+import org.apache.ratis.statemachine.SimpleStateMachine4Testing;
+import org.apache.ratis.statemachine.StateMachine;
+import org.apache.ratis.statemachine.TransactionContext;
+import org.apache.ratis.util.Log4jUtils;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.io.IOException;
+
+/**
+ * Tests the leader step down flag in {@link StateMachineException}, which
+ * determines whether the leader should step down or not when it receives the
+ * exception from {@link StateMachine#preAppendTransaction}.
+ */
+public abstract class PreAppendLeaderStepDownTest<CLUSTER extends MiniRaftCluster>
+    extends BaseTest implements MiniRaftCluster.Factory.Get<CLUSTER> {
+  {
+    Log4jUtils.setLogLevel(RaftServer.Division.LOG, Level.DEBUG);
+    Log4jUtils.setLogLevel(RaftLog.LOG, Level.DEBUG);
+    Log4jUtils.setLogLevel(RaftClient.LOG, Level.DEBUG);
+  }
+
+  private static volatile boolean leaderShouldStepDown = false;
+
+  protected static class StateMachineWithException extends
+      SimpleStateMachine4Testing {
+
+    @Override
+    public TransactionContext preAppendTransaction(TransactionContext trx)
+        throws IOException {
+      throw new StateMachineException("Fake Exception in preAppend", leaderShouldStepDown);
+    }
+  }
+
+  {
+    final RaftProperties prop = getProperties();
+    prop.setClass(MiniRaftCluster.STATEMACHINE_CLASS_KEY, StateMachineWithException.class, StateMachine.class);
+  }
+
+  @Test
+  public void testLeaderStepDown() throws Exception {
+    leaderShouldStepDown = true;
+    runWithNewCluster(3, this::runTestLeaderStepDown);
+  }
+
+  @Test
+  public void testNoLeaderStepDown() throws Exception {
+    leaderShouldStepDown = false;
+    runWithNewCluster(3, this::runTestLeaderStepDown);
+  }
+
+  private void runTestLeaderStepDown(CLUSTER cluster) throws Exception {
+    final RaftServer.Division oldLeader = RaftTestUtil.waitForLeader(cluster);
+    try (final RaftClient client = cluster.createClient(oldLeader.getId())) {
+      final RaftClientRpc rpc = client.getClientRpc();
+      final long callId = 999;
+      final RaftTestUtil.SimpleMessage message = new RaftTestUtil.SimpleMessage("message");
+      RaftClientRequest r = cluster.newRaftClientRequest(client.getId(), oldLeader.getId(), callId, message);
+
+      long oldTerm =
+          RaftTestUtil.waitForLeader(cluster).getRaftLog().getLastEntryTermIndex().getTerm();
+
+      // Cannot check the state machine exception attached to the reply for the
+      // leader step down flag, because that flag is lost when the exception
+      // is converted to and from a protobuf to pass back from the cluster to
+      // the client.
+      rpc.sendRequest(r);
+
+      long newTerm =
+          RaftTestUtil.waitForLeader(cluster).getRaftLog().getLastEntryTermIndex().getTerm();
+
+      if (leaderShouldStepDown) {
+        Assert.assertTrue(newTerm > oldTerm);
+      } else {
+        Assert.assertEquals(newTerm, oldTerm);
+      }
+
+      cluster.shutdown();
+    }
+  }
+}
diff --git a/ratis-common/src/main/java/org/apache/ratis/protocol/exceptions/StateMachineException.java b/ratis-test/src/test/java/org/apache/ratis/grpc/TestPreAppendLeaderStepDownWithGrpc.java
similarity index 50%
copy from ratis-common/src/main/java/org/apache/ratis/protocol/exceptions/StateMachineException.java
copy to ratis-test/src/test/java/org/apache/ratis/grpc/TestPreAppendLeaderStepDownWithGrpc.java
index 6a97203..ff9bd43 100644
--- a/ratis-common/src/main/java/org/apache/ratis/protocol/exceptions/StateMachineException.java
+++ b/ratis-test/src/test/java/org/apache/ratis/grpc/TestPreAppendLeaderStepDownWithGrpc.java
@@ -15,23 +15,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.ratis.protocol.exceptions;
+package org.apache.ratis.grpc;
 
-import org.apache.ratis.protocol.RaftGroupMemberId;
+import org.apache.ratis.server.impl.PreAppendLeaderStepDownTest;
 
-public class StateMachineException extends RaftException {
-  public StateMachineException(RaftGroupMemberId serverId, Throwable cause) {
-    // cause.getMessage is added to this exception message as the exception received through
-    // RPC call contains similar message but Simulated RPC doesn't. Adding the message
-    // from cause to this exception makes it consistent across simulated and other RPC implementations.
-    super(cause.getClass().getName() + " from Server " + serverId + ": " + cause.getMessage(), cause);
-  }
-
-  public StateMachineException(String msg) {
-    super(msg);
-  }
-
-  public StateMachineException(String message, Throwable cause) {
-    super(message, cause);
-  }
+public class TestPreAppendLeaderStepDownWithGrpc
+    extends PreAppendLeaderStepDownTest<MiniRaftClusterWithGrpc>
+    implements MiniRaftClusterWithGrpc.FactoryGet {
 }
diff --git a/ratis-common/src/main/java/org/apache/ratis/protocol/exceptions/StateMachineException.java b/ratis-test/src/test/java/org/apache/ratis/netty/TestPreAppendLeaderStepDownWithNetty.java
similarity index 50%
copy from ratis-common/src/main/java/org/apache/ratis/protocol/exceptions/StateMachineException.java
copy to ratis-test/src/test/java/org/apache/ratis/netty/TestPreAppendLeaderStepDownWithNetty.java
index 6a97203..0420af4 100644
--- a/ratis-common/src/main/java/org/apache/ratis/protocol/exceptions/StateMachineException.java
+++ b/ratis-test/src/test/java/org/apache/ratis/netty/TestPreAppendLeaderStepDownWithNetty.java
@@ -15,23 +15,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.ratis.protocol.exceptions;
+package org.apache.ratis.netty;
 
-import org.apache.ratis.protocol.RaftGroupMemberId;
+import org.apache.ratis.server.impl.PreAppendLeaderStepDownTest;
 
-public class StateMachineException extends RaftException {
-  public StateMachineException(RaftGroupMemberId serverId, Throwable cause) {
-    // cause.getMessage is added to this exception message as the exception received through
-    // RPC call contains similar message but Simulated RPC doesn't. Adding the message
-    // from cause to this exception makes it consistent across simulated and other RPC implementations.
-    super(cause.getClass().getName() + " from Server " + serverId + ": " + cause.getMessage(), cause);
-  }
-
-  public StateMachineException(String msg) {
-    super(msg);
-  }
-
-  public StateMachineException(String message, Throwable cause) {
-    super(message, cause);
-  }
+public class TestPreAppendLeaderStepDownWithNetty
+    extends PreAppendLeaderStepDownTest<MiniRaftClusterWithNetty>
+    implements MiniRaftClusterWithNetty.FactoryGet {
 }
diff --git a/ratis-common/src/main/java/org/apache/ratis/protocol/exceptions/StateMachineException.java b/ratis-test/src/test/java/org/apache/ratis/server/simulation/TestPreAppendLeaderStepDownWithSimulatedRpc.java
similarity index 50%
copy from ratis-common/src/main/java/org/apache/ratis/protocol/exceptions/StateMachineException.java
copy to ratis-test/src/test/java/org/apache/ratis/server/simulation/TestPreAppendLeaderStepDownWithSimulatedRpc.java
index 6a97203..0f317a3 100644
--- a/ratis-common/src/main/java/org/apache/ratis/protocol/exceptions/StateMachineException.java
+++ b/ratis-test/src/test/java/org/apache/ratis/server/simulation/TestPreAppendLeaderStepDownWithSimulatedRpc.java
@@ -15,23 +15,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.ratis.protocol.exceptions;
+package org.apache.ratis.server.simulation;
 
-import org.apache.ratis.protocol.RaftGroupMemberId;
+import org.apache.ratis.server.impl.PreAppendLeaderStepDownTest;
 
-public class StateMachineException extends RaftException {
-  public StateMachineException(RaftGroupMemberId serverId, Throwable cause) {
-    // cause.getMessage is added to this exception message as the exception received through
-    // RPC call contains similar message but Simulated RPC doesn't. Adding the message
-    // from cause to this exception makes it consistent across simulated and other RPC implementations.
-    super(cause.getClass().getName() + " from Server " + serverId + ": " + cause.getMessage(), cause);
-  }
-
-  public StateMachineException(String msg) {
-    super(msg);
-  }
-
-  public StateMachineException(String message, Throwable cause) {
-    super(message, cause);
-  }
+public class TestPreAppendLeaderStepDownWithSimulatedRpc
+    extends PreAppendLeaderStepDownTest<MiniRaftClusterWithSimulatedRpc>
+    implements MiniRaftClusterWithSimulatedRpc.FactoryGet {
 }