You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by er...@apache.org on 2021/06/09 20:39:47 UTC

[ozone] branch HDDS-3698-nonrolling-upgrade updated: HDDS-5109. Track OM prepare intermittent integration test failure. (#2288)

This is an automated email from the ASF dual-hosted git repository.

erose pushed a commit to branch HDDS-3698-nonrolling-upgrade
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/HDDS-3698-nonrolling-upgrade by this push:
     new 2c0adac  HDDS-5109. Track OM prepare intermittent integration test failure. (#2288)
2c0adac is described below

commit 2c0adac260c20574d4bbdcbf63ef76537522ba80
Author: Ethan Rose <33...@users.noreply.github.com>
AuthorDate: Wed Jun 9 16:39:21 2021 -0400

    HDDS-5109. Track OM prepare intermittent integration test failure. (#2288)
---
 .../feature/how-to-do-a-nonrolling-upgrade.md      |   9 +-
 .../ozone/om/protocol/OzoneManagerProtocol.java    |   2 +-
 .../hadoop/ozone/om/TestOzoneManagerPrepare.java   |  74 +++++----
 .../src/main/proto/OmClientProtocol.proto          |   4 +-
 .../org/apache/hadoop/ozone/om/OzoneManager.java   | 136 +++++++++++-----
 .../hadoop/ozone/om/OzoneManagerPrepareState.java  | 141 ++++++++++-------
 .../ozone/om/request/upgrade/OMPrepareRequest.java | 174 +++++++++++----------
 .../om/ratis/TestOzoneManagerStateMachine.java     |  26 +--
 .../upgrade/TestOMCancelPrepareRequest.java        |   2 +-
 .../om/upgrade/TestOzoneManagerPrepareState.java   |  76 ++++-----
 10 files changed, 391 insertions(+), 253 deletions(-)

diff --git a/hadoop-hdds/docs/content/feature/how-to-do-a-nonrolling-upgrade.md b/hadoop-hdds/docs/content/feature/how-to-do-a-nonrolling-upgrade.md
index ece4816..3881ef3 100644
--- a/hadoop-hdds/docs/content/feature/how-to-do-a-nonrolling-upgrade.md
+++ b/hadoop-hdds/docs/content/feature/how-to-do-a-nonrolling-upgrade.md
@@ -32,7 +32,10 @@ Replace artifacts of all components newer version.
 Start the SCM and DNs in a regular way.
 Start the Ozone Manager using the --upgrade flag.
  
-    ozone --deamon om start --upgrade
+    ozone --daemon om start --upgrade
+
+**IMPORTANT** All OMs must be started with the --upgrade flag.
+    - If only some are started with the flag by mistake, run `ozone admin om -id=<om-sevice-id> cancelprepare`.
 
 ### Finalize SCM and OM individually.
  
@@ -41,9 +44,11 @@ Start the Ozone Manager using the --upgrade flag.
     ozone admin om -id=<service-id> finalizeupgrade
 
 ### Downgrade (instead of finalizing)
+ - Prepare the ozone managers: `ozone admin om -id=<om-sevice-id> prepare`
  - Stop all components (OMs, SCMs & DNs) using an appropriate 'stop' command.
  - Replace artifacts of all components newer version.
  - Start the SCM and DNs in a regular way.
  - Start the Ozone Manager using the '--downgrade' flag.
+    - Same conditions apply to the --upgrade and --downgrade flags.
 
- 
\ No newline at end of file
+ 
diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocol/OzoneManagerProtocol.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocol/OzoneManagerProtocol.java
index 14a969d..5b98a30 100644
--- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocol/OzoneManagerProtocol.java
+++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocol/OzoneManagerProtocol.java
@@ -626,7 +626,7 @@ public interface OzoneManagerProtocol
       throws IOException {
     return PrepareStatusResponse.newBuilder()
         .setCurrentTxnIndex(-1)
-        .setStatus(PrepareStatus.PREPARE_NOT_STARTED)
+        .setStatus(PrepareStatus.NOT_PREPARED)
         .build();
   }
 
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerPrepare.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerPrepare.java
index 05ba8c4..4008597 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerPrepare.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerPrepare.java
@@ -50,6 +50,8 @@ import org.apache.hadoop.test.LambdaTestUtils;
 import org.junit.Assert;
 import org.junit.Ignore;
 import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * Test OM prepare against actual mini cluster.
@@ -68,6 +70,9 @@ public class TestOzoneManagerPrepare extends TestOzoneManagerHA {
   private ClientProtocol clientProtocol;
   private ObjectStore store;
 
+  private static final Logger LOG =
+      LoggerFactory.getLogger(TestOzoneManagerPrepare.class);
+
   public void setup() throws Exception {
     cluster = getCluster();
     store = getObjectStore();
@@ -168,7 +173,6 @@ public class TestOzoneManagerPrepare extends TestOzoneManagerHA {
     }
   }
 
-  @Ignore("Flaky test tracked in HDDS-5109")
   @Test
   public void testPrepareWithRestart() throws Exception {
     setup();
@@ -370,18 +374,39 @@ public class TestOzoneManagerPrepare extends TestOzoneManagerHA {
     assertKeysWritten(volumeName, expectedKeys, cluster.getOzoneManagersList());
   }
 
+  /**
+   * Checks that all provided OMs have {@code expectedKeys} in the volume
+   * {@code volumeName} and retries checking until the test timeout.
+   * All provided OMs are checked, not just a majority, so that we can
+   * test that downed OMs are able to make a full recovery after preparation,
+   * even though the cluster could appear healthy with just 2 OMs.
+   */
   private void assertKeysWritten(String volumeName, Set<String> expectedKeys,
       List<OzoneManager> ozoneManagers) throws Exception {
     for (OzoneManager om: ozoneManagers) {
-      List<OmKeyInfo> keys = om.getMetadataManager().listKeys(volumeName,
-          BUCKET, null, KEY_PREFIX, 100);
-
-      Assert.assertEquals("Keys not found in " + om.getOMNodeId(),
-          expectedKeys.size(),
-          keys.size());
-      for (OmKeyInfo keyInfo: keys) {
-        Assert.assertTrue(expectedKeys.contains(keyInfo.getKeyName()));
-      }
+      // Wait for a potentially slow follower to apply all key writes.
+      LambdaTestUtils.await(WAIT_TIMEOUT_MILLIS, 1000, () -> {
+        List<OmKeyInfo> keys = om.getMetadataManager().listKeys(volumeName,
+            BUCKET, null, KEY_PREFIX, 100);
+
+        boolean allKeysFound = (expectedKeys.size() == keys.size());
+        if (!allKeysFound) {
+          LOG.info("In {} waiting for number of keys {} to equal " +
+              "expected number of keys {}.", om.getOMNodeId(),
+              keys.size(), expectedKeys.size());
+        } else {
+          for (OmKeyInfo keyInfo : keys) {
+            if (!expectedKeys.contains(keyInfo.getKeyName())) {
+              allKeysFound = false;
+              LOG.info("In {} expected keys did not contain key {}",
+                  om.getOMNodeId(), keyInfo.getKeyName());
+              break;
+            }
+          }
+        }
+
+        return allKeysFound;
+      });
     }
   }
 
@@ -395,11 +420,13 @@ public class TestOzoneManagerPrepare extends TestOzoneManagerHA {
     clientProtocol.getOzoneManagerClient().cancelOzoneManagerPrepare();
   }
 
-  private void assertClusterPrepared(long preparedIndex) throws Exception {
-    assertClusterPrepared(preparedIndex, cluster.getOzoneManagersList());
+  private void assertClusterPrepared(long expectedPreparedIndex)
+      throws Exception {
+    assertClusterPrepared(expectedPreparedIndex,
+        cluster.getOzoneManagersList());
   }
 
-  private void assertClusterPrepared(long preparedIndex,
+  private void assertClusterPrepared(long expectedPreparedIndex,
       List<OzoneManager> ozoneManagers) throws Exception {
 
     for (OzoneManager om : ozoneManagers) {
@@ -408,24 +435,17 @@ public class TestOzoneManagerPrepare extends TestOzoneManagerHA {
       LambdaTestUtils.await(WAIT_TIMEOUT_MILLIS,
           1000, () -> {
           if (!om.isRunning()) {
+            LOG.info("{} is not yet started.", om.getOMNodeId());
             return false;
           } else {
-            boolean preparedAtIndex = false;
             OzoneManagerPrepareState.State state =
                 om.getPrepareState().getState();
 
-            if (state.getStatus() == PrepareStatus.PREPARE_COMPLETED) {
-              if (state.getIndex() == preparedIndex) {
-                preparedAtIndex = true;
-              } else {
-                // State will not change if we are prepared at the wrong index.
-                // Break out of wait.
-                throw new Exception("OM " + om.getOMNodeId() + " prepared " +
-                    "but prepare index " + state.getIndex() + " does not " +
-                    "match expected prepare index " + preparedIndex);
-              }
-            }
-            return preparedAtIndex;
+            LOG.info("{} has prepare status: {} prepare index: {}.",
+                om.getOMNodeId(), state.getStatus(), state.getIndex());
+
+            return (state.getStatus() == PrepareStatus.PREPARE_COMPLETED) &&
+                (state.getIndex() >= expectedPreparedIndex);
           }
         });
     }
@@ -457,7 +477,7 @@ public class TestOzoneManagerPrepare extends TestOzoneManagerHA {
               return false;
             } else {
               return om.getPrepareState().getState().getStatus() ==
-                  PrepareStatus.PREPARE_NOT_STARTED;
+                  PrepareStatus.NOT_PREPARED;
             }
           });
     }
diff --git a/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto b/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto
index 4899794..fc5b52f 100644
--- a/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto
+++ b/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto
@@ -1105,8 +1105,8 @@ message PrepareStatusRequest {
 
 message PrepareStatusResponse {
     enum PrepareStatus {
-        PREPARE_NOT_STARTED = 1;
-        PREPARE_IN_PROGRESS = 2;
+        NOT_PREPARED = 1;
+        PREPARE_GATE_ENABLED = 2;
         PREPARE_COMPLETED = 3;
     }
     required PrepareStatus status = 1;
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
index 162315c..37a75d2 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
@@ -581,11 +581,13 @@ public final class OzoneManager extends ServiceRuntimeInfoImpl
           updateLayoutVersionInDB(versionManager, metadataManager);
         }
       }
-    }
 
-    // Prepare state depends on the transaction ID of metadataManager after a
-    // restart.
-    instantiatePrepareState(withNewSnapshot);
+      instantiatePrepareStateAfterSnapshot();
+    } else {
+      // Prepare state depends on the transaction ID of metadataManager after a
+      // restart.
+      instantiatePrepareStateOnStartup();
+    }
 
     if (isAclEnabled) {
       accessAuthorizer = getACLAuthorizerInstance(configuration);
@@ -1385,10 +1387,16 @@ public final class OzoneManager extends ServiceRuntimeInfoImpl
     return omRatisSnapshotInfo;
   }
 
-  @VisibleForTesting
   public long getRatisSnapshotIndex() throws IOException {
-    return TransactionInfo.readTransactionInfo(metadataManager)
-        .getTransactionIndex();
+    TransactionInfo dbTxnInfo =
+        TransactionInfo.readTransactionInfo(metadataManager);
+    if (dbTxnInfo == null) {
+      // If there are no transactions in the database, it has applied index 0
+      // only.
+      return 0;
+    } else {
+      return dbTxnInfo.getTransactionIndex();
+    }
   }
 
   /**
@@ -3902,45 +3910,95 @@ public final class OzoneManager extends ServiceRuntimeInfoImpl
     return prepareState;
   }
 
-  private void instantiatePrepareState(boolean withNewSnapshot)
+  /**
+   * Determines if the prepare gate should be enabled on this OM after OM
+   * is restarted.
+   * This must be done after metadataManager is instantiated
+   * and before the RPC server is started.
+   */
+  private void instantiatePrepareStateOnStartup()
       throws IOException {
-    // If the prepare marker file is present and its index matches the last
-    // transaction index in the OM DB, turn on the in memory flag to
-    // put the ozone manager in prepare mode, disallowing write requests.
-    // This must be done after metadataManager is instantiated
-    // and before the RPC server is started.
     TransactionInfo txnInfo = metadataManager.getTransactionInfoTable()
         .get(TRANSACTION_INFO_KEY);
-    prepareState = new OzoneManagerPrepareState(configuration);
-
-    // If we have no transaction info in the DB, then no prepare request
-    // could have been received, since the request would update the txn
-    // index in the DB.
-    if (txnInfo != null) {
-      // Only puts OM in prepare mode if a marker file matching the txn index
-      // is found.
-      PrepareStatus status =
-          prepareState.restorePrepare(txnInfo.getTransactionIndex());
-      if (status == PrepareStatus.PREPARE_COMPLETED) {
-        LOG.info("Ozone Manager {} restarted in prepare mode.",
-            getOMNodeId());
-      } else if (withNewSnapshot) {
-        TransactionInfo prepareInfo =
-            metadataManager.getTransactionInfoTable().get(PREPARE_MARKER_KEY);
-        if (prepareInfo != null &&
-            prepareInfo.getTransactionIndex() ==
-                txnInfo.getTransactionIndex()) {
-          LOG.info("Prepare Index matches last applied index, but the prepare" +
-              " marker file is not found. A snapshot is being installed in " +
-              " this OM. Enabling prepare gate.");
-          prepareState.finishPrepare(prepareInfo.getTransactionIndex());
+    if (txnInfo == null) {
+      // No prepare request could be received if there are not transactions.
+      prepareState = new OzoneManagerPrepareState(configuration);
+    } else {
+      prepareState = new OzoneManagerPrepareState(configuration,
+          txnInfo.getTransactionIndex());
+      TransactionInfo dbPrepareValue =
+          metadataManager.getTransactionInfoTable().get(PREPARE_MARKER_KEY);
+
+      boolean hasMarkerFile =
+          (prepareState.getState().getStatus() ==
+              PrepareStatus.PREPARE_COMPLETED);
+      boolean hasDBMarker = (dbPrepareValue != null);
+
+      if (hasDBMarker) {
+        long dbPrepareIndex = dbPrepareValue.getTransactionIndex();
+
+        if (hasMarkerFile) {
+          long prepareFileIndex = prepareState.getState().getIndex();
+          // If marker and DB prepare index do not match, use the DB value
+          // since this is synced through Ratis, to avoid divergence.
+          if (prepareFileIndex != dbPrepareIndex) {
+            LOG.warn("Prepare marker file index {} does not match DB prepare " +
+                "index {}. Writing DB index to prepare file and maintaining " +
+                "prepared state.", prepareFileIndex, dbPrepareIndex);
+            prepareState.finishPrepare(dbPrepareIndex);
+          }
+          // Else, marker and DB are present and match, so OM is prepared.
         } else {
-          prepareState.cancelPrepare();
+          // Prepare cancelled with startup flag to remove marker file.
+          // Persist this to the DB.
+          // If the startup flag is used it should be used on all OMs to avoid
+          // divergence.
+          metadataManager.getTransactionInfoTable().delete(PREPARE_MARKER_KEY);
         }
+      } else if (hasMarkerFile) {
+        // Marker file present but no DB entry present.
+        // This should never happen. If a prepare request fails partway
+        // through, OM should replay it so both the DB and marker file exist.
+        throw new OMException("Prepare marker file found on startup without " +
+            "a corresponding database entry. Corrupt prepare state.",
+            ResultCodes.PREPARE_FAILED);
       }
+      // Else, no DB or marker file, OM is not prepared.
+    }
+  }
+
+  /**
+   * Determines if the prepare gate should be enabled on this OM after OM
+   * receives a snapshot.
+   */
+  private void instantiatePrepareStateAfterSnapshot()
+      throws IOException {
+    TransactionInfo txnInfo = metadataManager.getTransactionInfoTable()
+        .get(TRANSACTION_INFO_KEY);
+    if (txnInfo == null) {
+      // No prepare request could be received if there are not transactions.
+      prepareState = new OzoneManagerPrepareState(configuration);
     } else {
-      // Make sure OM prepare state is clean before proceeding.
-      prepareState.cancelPrepare();
+      prepareState = new OzoneManagerPrepareState(configuration,
+          txnInfo.getTransactionIndex());
+      TransactionInfo dbPrepareValue =
+          metadataManager.getTransactionInfoTable().get(PREPARE_MARKER_KEY);
+
+      boolean hasDBMarker = (dbPrepareValue != null);
+
+      if (hasDBMarker) {
+        // Snapshot contained a prepare request to apply.
+        // Update the in memory prepare gate and marker file index.
+        // If we have already done this, the operation is idempotent.
+        long dbPrepareIndex = dbPrepareValue.getTransactionIndex();
+        prepareState.restorePrepareFromIndex(dbPrepareIndex,
+            txnInfo.getTransactionIndex());
+      } else {
+        // No DB marker.
+        // Deletes marker file if exists, otherwise does nothing if we were not
+        // already prepared.
+        prepareState.cancelPrepare();
+      }
     }
   }
 
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManagerPrepareState.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManagerPrepareState.java
index 43ef312..130ce4d 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManagerPrepareState.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManagerPrepareState.java
@@ -21,6 +21,7 @@ import com.google.common.annotations.VisibleForTesting;
 import org.apache.hadoop.hdds.conf.ConfigurationSource;
 import org.apache.hadoop.hdds.server.ServerUtils;
 import org.apache.hadoop.ozone.OzoneConsts;
+import org.apache.hadoop.ozone.om.exceptions.OMException;
 import org.apache.hadoop.ozone.om.ratis.OzoneManagerStateMachine;
 import org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer;
 import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type;
@@ -53,16 +54,39 @@ public final class OzoneManagerPrepareState {
   private PrepareStatus status;
   private final ConfigurationSource conf;
 
+  /**
+   * Sets prepare state to {@link PrepareStatus#NOT_PREPARED},
+   * ignoring any marker file that may or may not exist.
+   */
   public OzoneManagerPrepareState(ConfigurationSource conf) {
-    prepareGateEnabled = false;
-    prepareIndex = NO_PREPARE_INDEX;
-    status = PrepareStatus.PREPARE_NOT_STARTED;
     this.conf = conf;
+    prepareIndex = NO_PREPARE_INDEX;
+    prepareGateEnabled = false;
+    status = PrepareStatus.NOT_PREPARED;
+  }
+
+  /**
+   * Restores prepare state from the marker file if it exists, otherwise sets
+   * prepare state to {@link PrepareStatus#NOT_PREPARED}.
+   *
+   * @param conf Configuration used to determine marker file location.
+   * @param currentIndex The OM's current log index to verify the prepare
+   *     index against. Prepare index should not be larger than the current
+   *     index.
+   * @throws IOException On error restoring prepare state from marker file.
+   */
+  public OzoneManagerPrepareState(ConfigurationSource conf, long currentIndex)
+      throws IOException {
+    this(conf);
+
+    if (getPrepareMarkerFile().exists()) {
+      restorePrepareFromFile(currentIndex);
+    }
   }
 
   /**
    * Turns on the prepare gate flag, clears the prepare index, and moves the
-   * prepare status to {@link PrepareStatus#PREPARE_IN_PROGRESS}.
+   * prepare status to {@link PrepareStatus#PREPARE_GATE_ENABLED}.
    *
    * Turning on the prepare gate flag will enable a gate in the
    * {@link OzoneManagerStateMachine#preAppendTransaction} (called on leader
@@ -73,24 +97,23 @@ public final class OzoneManagerPrepareState {
   public synchronized void enablePrepareGate() {
     prepareGateEnabled = true;
     prepareIndex = NO_PREPARE_INDEX;
-    status = PrepareStatus.PREPARE_IN_PROGRESS;
+    status = PrepareStatus.PREPARE_GATE_ENABLED;
   }
 
   /**
    * Removes the prepare marker file, clears the prepare index, turns off
    * the prepare gate, and moves the prepare status to
-   * {@link PrepareStatus#PREPARE_NOT_STARTED}.
+   * {@link PrepareStatus#NOT_PREPARED}.
    * This can be called from any state to clear the current prepare state.
    *
    * @throws IOException If the prepare marker file exists but cannot be
    * deleted.
    */
-  public synchronized void cancelPrepare()
-      throws IOException {
-    deletePrepareMarkerFile();
+  public synchronized void cancelPrepare() throws IOException {
     prepareIndex = NO_PREPARE_INDEX;
     prepareGateEnabled = false;
-    status = PrepareStatus.PREPARE_NOT_STARTED;
+    status = PrepareStatus.NOT_PREPARED;
+    deletePrepareMarkerFile();
   }
 
   /**
@@ -103,40 +126,57 @@ public final class OzoneManagerPrepareState {
    * @throws IOException If the marker file cannot be written.
    */
   public synchronized void finishPrepare(long index) throws IOException {
-    finishPrepare(index, true);
+    restorePrepareFromIndex(index, index, true);
+  }
+
+  /**
+   * Finishes preparation the same way as
+   * {@link OzoneManagerPrepareState#finishPrepare(long)}, but only if {@code
+   * currentIndex} is at least as large as {@code minIndex}. This is useful
+   * if the current log index needs to be checked against a prepare index
+   * saved to disk for validity.
+   */
+  public synchronized void restorePrepareFromIndex(long restoredPrepareIndex,
+      long currentIndex) throws IOException {
+    restorePrepareFromIndex(restoredPrepareIndex, currentIndex, true);
   }
 
-  private void finishPrepare(long index, boolean writeFile) throws IOException {
-    // Enabling the prepare gate is idempotent, and may have already been
-    // performed if we are the leader. If we are a follower, we must ensure this
-    // is run now case we become the leader.
-    enablePrepareGate();
+  private void restorePrepareFromIndex(long restoredPrepareIndex,
+      long currentIndex, boolean writeFile) throws IOException {
+    if (restoredPrepareIndex <= currentIndex) {
+      // Enabling the prepare gate is idempotent, and may have already been
+      // performed if we are the leader. If we are a follower, we must ensure
+      // this is run now in case we become the leader.
+      enablePrepareGate();
 
-    if (writeFile) {
-      writePrepareMarkerFile(index);
+      if (writeFile) {
+        writePrepareMarkerFile(restoredPrepareIndex);
+      }
+      prepareIndex = currentIndex;
+      status = PrepareStatus.PREPARE_COMPLETED;
+    } else {
+      throwPrepareException("Failed to restore OM prepare " +
+              "state, because the existing prepare index %d is larger than" +
+              "the current index %d.", restoredPrepareIndex, currentIndex);
     }
-    prepareIndex = index;
-    status = PrepareStatus.PREPARE_COMPLETED;
   }
 
   /**
    * Uses the on disk marker file to determine the OM's prepare state.
    * If the marker file exists and contains an index matching {@code
-   * expectedPrepareIndex}, the necessary steps will be taken to finish
+   * currentIndex}, the necessary steps will be taken to finish
    * preparation and the state will be moved to
    * {@link PrepareStatus#PREPARE_COMPLETED}.
    * Else, the status will be moved to
-   * {@link PrepareStatus#PREPARE_NOT_STARTED} and any preparation steps will
+   * {@link PrepareStatus#NOT_PREPARED} and any preparation steps will
    * be cancelled.
    *
-   * @return The status the OM is in after this method call.
    * @throws IOException If the marker file cannot be read, and it cannot be
    * deleted as part of moving to the
-   * {@link PrepareStatus#PREPARE_NOT_STARTED} state.
+   * {@link PrepareStatus#NOT_PREPARED} state.
    */
-  public synchronized PrepareStatus restorePrepare(long expectedPrepareIndex)
+  public synchronized void restorePrepareFromFile(long currentIndex)
       throws IOException {
-    boolean prepareIndexRead = true;
     long prepareMarkerIndex = NO_PREPARE_INDEX;
 
     File prepareMarkerFile = getPrepareMarkerFile();
@@ -145,48 +185,37 @@ public final class OzoneManagerPrepareState {
       try(FileInputStream stream = new FileInputStream(prepareMarkerFile)) {
         stream.read(data);
       } catch (IOException e) {
-        LOG.error("Failed to read prepare marker file {} while restoring OM.",
-            prepareMarkerFile.getAbsolutePath());
-        prepareIndexRead = false;
+        throwPrepareException(e, "Failed to read prepare marker " +
+            "file %s while restoring OM.", prepareMarkerFile.getAbsolutePath());
       }
 
       try {
         prepareMarkerIndex = Long.parseLong(
             new String(data, StandardCharsets.UTF_8));
       } catch (NumberFormatException e) {
-        LOG.error("Failed to parse log index from prepare marker file {} " +
-            "while restoring OM.", prepareMarkerFile.getAbsolutePath());
-        prepareIndexRead = false;
+        throwPrepareException("Failed to parse log index from " +
+            "prepare marker file %s while restoring OM.",
+            prepareMarkerFile.getAbsolutePath());
       }
     } else {
       // No marker file found.
-      prepareIndexRead = false;
+      throwPrepareException("Unable to find prepare marker file to restore" +
+          " from. Expected %s: ", prepareMarkerFile.getAbsolutePath());
     }
 
-    boolean prepareRestored = false;
-    if (prepareIndexRead) {
-      if (prepareMarkerIndex != expectedPrepareIndex) {
-        LOG.error("Failed to restore OM prepare state, because the expected " +
-            "prepare index {} does not match the index {} written to the " +
-            "marker file.", expectedPrepareIndex, prepareMarkerIndex);
-      } else {
-        // Prepare state can only be restored if we read the expected index
-        // from the marker file.
-        prepareRestored = true;
-      }
-    }
+    restorePrepareFromIndex(prepareMarkerIndex, currentIndex, false);
+  }
 
-    if (prepareRestored) {
-      // Do not rewrite the marker file, since we verified it already exists.
-      finishPrepare(prepareMarkerIndex, false);
-    } else {
-      // If the potentially faulty marker file cannot be deleted,
-      // propagate the IOException.
-      // If there is no marker file, this call sets the in memory state only.
-      cancelPrepare();
-    }
+  private void throwPrepareException(Throwable cause, String format,
+      Object... args) throws OMException {
+    throw new OMException(String.format(format, args), cause,
+        OMException.ResultCodes.PREPARE_FAILED);
+  }
 
-    return status;
+  private void throwPrepareException(String format,
+      Object... args) throws OMException {
+    throw new OMException(String.format(format, args),
+        OMException.ResultCodes.PREPARE_FAILED);
   }
 
   /**
@@ -241,7 +270,7 @@ public final class OzoneManagerPrepareState {
       Files.delete(markerFile.toPath());
       LOG.info("Deleted prepare marker file: {}", markerFile.getAbsolutePath());
     } else {
-      LOG.info("Request to delete prepare marker file that does not exist: {}",
+      LOG.debug("Request to delete prepare marker file that does not exist: {}",
           markerFile.getAbsolutePath());
     }
   }
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/upgrade/OMPrepareRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/upgrade/OMPrepareRequest.java
index 334cc6a..a7484c1 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/upgrade/OMPrepareRequest.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/upgrade/OMPrepareRequest.java
@@ -17,8 +17,6 @@
 
 package org.apache.hadoop.ozone.om.request.upgrade;
 
-import org.apache.hadoop.hdds.utils.TransactionInfo;
-import org.apache.hadoop.ozone.om.OMMetadataManager;
 import org.apache.hadoop.ozone.om.OzoneManager;
 import org.apache.hadoop.ozone.om.exceptions.OMException;
 import org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer;
@@ -32,7 +30,6 @@ import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Prepare
 import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest;
 import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMResponse;
 
-import static org.apache.hadoop.ozone.OzoneConsts.TRANSACTION_INFO_KEY;
 import static org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type;
 
 import org.apache.ratis.server.RaftServer;
@@ -87,14 +84,12 @@ public class OMPrepareRequest extends OMClientRequest {
 
     try {
       // Create response.
-      // DB snapshot for prepare will include the transaction to commit it,
-      // making the prepare index one more than this txn's log index.
-      long prepareIndex = transactionLogIndex + 1;
       PrepareResponse omResponse = PrepareResponse.newBuilder()
-              .setTxnID(prepareIndex)
+              .setTxnID(transactionLogIndex)
               .build();
       responseBuilder.setPrepareResponse(omResponse);
-      response = new OMPrepareResponse(responseBuilder.build(), prepareIndex);
+      response = new OMPrepareResponse(responseBuilder.build(),
+          transactionLogIndex);
 
       // Add response to double buffer before clearing logs.
       // This guarantees the log index of this request will be the same as
@@ -112,24 +107,18 @@ public class OMPrepareRequest extends OMClientRequest {
       // Since the response for this request was added to the double buffer
       // already, once this index reaches the state machine, we know all
       // transactions have been flushed.
-      waitForLogIndex(transactionLogIndex,
-          ozoneManager.getMetadataManager(), division,
+      waitForLogIndex(transactionLogIndex, ozoneManager, division,
           flushTimeout, flushCheckInterval);
+      takeSnapshotAndPurgeLogs(transactionLogIndex, division);
 
-      long snapshotIndex = takeSnapshotAndPurgeLogs(division);
-      if (snapshotIndex != prepareIndex) {
-        LOG.warn("Snapshot index {} does not " +
-            "match expected prepare index {}.", snapshotIndex, prepareIndex);
-      }
-
-      // Save transaction log index to a marker file, so if the OM restarts,
-      // it will remain in prepare mode on that index as long as the file
-      // exists.
-      ozoneManager.getPrepareState().finishPrepare(prepareIndex);
+      // Save prepare index to a marker file, so if the OM restarts,
+      // it will remain in prepare mode as long as the file exists and its
+      // log indices are >= the one in the file.
+      ozoneManager.getPrepareState().finishPrepare(transactionLogIndex);
 
       LOG.info("OM {} prepared at log index {}. Returning response {} with " +
-          "log index {}", ozoneManager.getOMNodeId(), prepareIndex, omResponse,
-          omResponse.getTxnID());
+          "log index {}", ozoneManager.getOMNodeId(), transactionLogIndex,
+          omResponse, omResponse.getTxnID());
     } catch (OMException e) {
       LOG.error("Prepare Request Apply failed in {}. ",
           ozoneManager.getOMNodeId(), e);
@@ -143,6 +132,15 @@ public class OMPrepareRequest extends OMClientRequest {
       response = new OMPrepareResponse(
           createErrorOMResponse(responseBuilder, new OMException(e,
               OMException.ResultCodes.PREPARE_FAILED)));
+
+      // Disable prepare gate and attempt to delete prepare marker file.
+      // Whether marker file delete fails or succeeds, we will return the
+      // above error response to the caller.
+      try {
+        ozoneManager.getPrepareState().cancelPrepare();
+      } catch (IOException ex) {
+        LOG.error("Failed to delete prepare marker file.", ex);
+      }
     }
 
     return response;
@@ -150,40 +148,47 @@ public class OMPrepareRequest extends OMClientRequest {
 
   /**
    * Waits for the specified index to be flushed to the state machine on
-   * disk, and to be updated in memory in Ratis.
+   * disk, and to be applied to Ratis's state machine.
    */
-  private static void waitForLogIndex(long indexToWaitFor,
-      OMMetadataManager metadataManager, RaftServer.Division division,
+  private static void waitForLogIndex(long minOMDBFlushIndex,
+      OzoneManager om, RaftServer.Division division,
       Duration flushTimeout, Duration flushCheckInterval)
       throws InterruptedException, IOException {
 
     long endTime = System.currentTimeMillis() + flushTimeout.toMillis();
-    boolean success = false;
-
-    while (!success && System.currentTimeMillis() < endTime) {
-      // If no transactions have been persisted to the DB, transaction info
-      // will be null, not zero, causing a null pointer exception within
-      // ozoneManager#getRatisSnaphotIndex.
-      // Get the transaction directly instead to handle the case when it is
-      // null.
-      TransactionInfo dbTxnInfo = metadataManager
-          .getTransactionInfoTable().get(TRANSACTION_INFO_KEY);
-      long ratisTxnIndex =
-          division.getStateMachine().getLastAppliedTermIndex().getIndex();
 
-      // Ratis may apply meta transactions after the prepare request, causing
-      // its in memory index to always be greater than the DB index.
-      if (dbTxnInfo == null) {
-        // If there are no transactions in the DB, we are prepared to log
-        // index 0 only.
-        success = (indexToWaitFor == 0)
-            && (ratisTxnIndex == indexToWaitFor + 1);
-      } else {
-        success = (dbTxnInfo.getTransactionIndex() == indexToWaitFor)
-            && (ratisTxnIndex >= indexToWaitFor + 1);
-      }
+    boolean omDBFlushed = false;
+    boolean ratisStateMachineApplied = false;
+
+    // Wait for Ratis commit index after the specified index to be applied to
+    // Ratis' state machine. This index will not appear in the OM DB until a
+    // snapshot is taken.
+    // If we purge logs without waiting for this index, it may not make it to
+    // the RocksDB snapshot, and then the log entry is lost on this OM.
+    long minRatisStateMachineIndex = minOMDBFlushIndex + 1;
+    long lastRatisCommitIndex = RaftLog.INVALID_LOG_INDEX;
+    long lastOMDBFlushIndex = RaftLog.INVALID_LOG_INDEX;
+
+    LOG.info("{} waiting for index {} to flush to OM DB and index {} to flush" +
+            " to Ratis state machine.", om.getOMNodeId(), minOMDBFlushIndex,
+        minRatisStateMachineIndex);
+    while (!(omDBFlushed && ratisStateMachineApplied) &&
+        System.currentTimeMillis() < endTime) {
+      // Check OM DB.
+      lastOMDBFlushIndex = om.getRatisSnapshotIndex();
+      omDBFlushed = (lastOMDBFlushIndex >= minOMDBFlushIndex);
+      LOG.debug("{} Current DB transaction index {}.", om.getOMNodeId(),
+          lastOMDBFlushIndex);
+
+      // Check ratis state machine.
+      lastRatisCommitIndex =
+          division.getStateMachine().getLastAppliedTermIndex().getIndex();
+      ratisStateMachineApplied = (lastRatisCommitIndex >=
+          minRatisStateMachineIndex);
+      LOG.debug("{} Current Ratis state machine transaction index {}.",
+          om.getOMNodeId(), lastRatisCommitIndex);
 
-      if (!success) {
+      if (!(omDBFlushed && ratisStateMachineApplied)) {
         Thread.sleep(flushCheckInterval.toMillis());
       }
     }
@@ -191,52 +196,63 @@ public class OMPrepareRequest extends OMClientRequest {
     // If the timeout waiting for all transactions to reach the state machine
     // is exceeded, the exception is propagated, resulting in an error response
     // to the client. They can retry the prepare request.
-    if (!success) {
+    if (!omDBFlushed) {
       throw new IOException(String.format("After waiting for %d seconds, " +
-              "State Machine has not applied  all the transactions.",
-          flushTimeout.getSeconds()));
+              "OM database flushed index %d which is less than the minimum " +
+              "required index %d.",
+          flushTimeout.getSeconds(), lastOMDBFlushIndex, minOMDBFlushIndex));
+    } else if (!ratisStateMachineApplied) {
+      throw new IOException(String.format("After waiting for %d seconds, " +
+              "Ratis state machine applied index %d which is less than" +
+              " the minimum required index %d.",
+          flushTimeout.getSeconds(), lastRatisCommitIndex,
+          minRatisStateMachineIndex));
     }
   }
 
   /**
-   * Take a snapshot of the state machine at the last index, and purge ALL logs.
-   * @param division Raft server division.
-   * @return The index the snapshot was taken on.
-   * @throws IOException on Error.
+   * Take a snapshot of the state machine at the last index, and purge at
+   * least all log with indices less than or equal to the prepare index.
+   * If there is another prepare request or cancel prepare request,
+   * this one will end up purging that request since it was allowed through
+   * the pre-append prepare gate.
+   * This means that an OM cannot support 2 prepare requests in the
+   * transaction pipeline (un-applied) at the same time.
    */
-  public static long takeSnapshotAndPurgeLogs(RaftServer.Division division)
-      throws IOException {
-
+  public static void takeSnapshotAndPurgeLogs(long prepareIndex,
+      RaftServer.Division division) throws IOException {
     StateMachine stateMachine = division.getStateMachine();
     long snapshotIndex = stateMachine.takeSnapshot();
-    RaftLog raftLog = division.getRaftLog();
-    long raftLogIndex = raftLog.getLastEntryTermIndex().getIndex();
-
-    // We can have a case where the log has a meta transaction after the
-    // prepare request or another prepare request. If there is another
-    // prepare request, this one will end up purging that request.
-    // This means that an OM cannot support 2 prepare requests in the
-    // transaction pipeline (un-applied) at the same time.
-    if (raftLogIndex > snapshotIndex) {
-      LOG.warn("Snapshot index {} does not " +
-          "match last log index {}.", snapshotIndex, raftLogIndex);
-      snapshotIndex = raftLogIndex;
+
+    if (snapshotIndex < prepareIndex) {
+      throw new IOException(String.format("OM DB snapshot index %d is less " +
+          "than prepare index %d. Some required logs may not have" +
+          "been persisted to the state machine.", snapshotIndex,
+          prepareIndex));
     }
 
     CompletableFuture<Long> purgeFuture =
-        raftLog.onSnapshotInstalled(snapshotIndex);
+        division.getRaftLog().onSnapshotInstalled(snapshotIndex);
 
     try {
-      Long purgeIndex = purgeFuture.get();
-      if (purgeIndex != snapshotIndex) {
-        throw new IOException("Purge index " + purgeIndex +
-            " does not match last index " + snapshotIndex);
+      long actualPurgeIndex = purgeFuture.get();
+
+      if (actualPurgeIndex != snapshotIndex) {
+        LOG.warn("Actual purge index {} does not " +
+              "match specified purge index {}. ", actualPurgeIndex,
+            snapshotIndex);
+      }
+
+      if (actualPurgeIndex < prepareIndex) {
+        throw new IOException(String.format("Actual purge index %d is less " +
+          "than prepare index %d. Some required logs may not have" +
+            " been removed.", actualPurgeIndex, prepareIndex));
       }
     } catch (Exception e) {
-      throw new IOException("Unable to purge logs.", e);
+      // Ozone manager error handler does not respect exception chaining and
+      // only displays the message of the top level exception.
+      throw new IOException("Unable to purge logs: " + e.getMessage());
     }
-
-    return snapshotIndex;
   }
 
   public static String getRequestType() {
diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerStateMachine.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerStateMachine.java
index 7c0ece0..b73bbc5 100644
--- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerStateMachine.java
+++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerStateMachine.java
@@ -271,15 +271,15 @@ public class TestOzoneManagerStateMachine {
         .setClientId("123")
         .build();
 
+    // Without prepare enabled, the txn should be returned unaltered.
     TransactionContext submittedTrx = mockTransactionContext(createKeyRequest);
     TransactionContext returnedTrx =
         ozoneManagerStateMachine.preAppendTransaction(submittedTrx);
-
-    // No prepare should be triggered, and the txn should be returned unaltered.
-    Assert.assertEquals(prepareState.getState().getStatus(),
-        PrepareStatus.PREPARE_NOT_STARTED);
     Assert.assertSame(submittedTrx, returnedTrx);
 
+    Assert.assertEquals(PrepareStatus.NOT_PREPARED,
+        prepareState.getState().getStatus());
+
     // Submit prepare request.
     OMRequest prepareRequest = OMRequest.newBuilder()
         .setPrepareRequest(
@@ -291,12 +291,12 @@ public class TestOzoneManagerStateMachine {
 
     submittedTrx = mockTransactionContext(prepareRequest);
     returnedTrx = ozoneManagerStateMachine.preAppendTransaction(submittedTrx);
-
-    // Prepare should be started, and txn should be returned unaltered.
-    Assert.assertEquals(prepareState.getState().getStatus(),
-        PrepareStatus.PREPARE_IN_PROGRESS);
     Assert.assertSame(submittedTrx, returnedTrx);
 
+    // Prepare should be started.
+    Assert.assertEquals(PrepareStatus.PREPARE_GATE_ENABLED,
+        prepareState.getState().getStatus());
+
     // Submitting a write request should now fail.
     try {
       ozoneManagerStateMachine.preAppendTransaction(
@@ -313,11 +313,15 @@ public class TestOzoneManagerStateMachine {
     }
 
     // Should be able to prepare again without issue.
-    Assert.assertEquals(prepareState.getState().getStatus(),
-        PrepareStatus.PREPARE_IN_PROGRESS);
+    submittedTrx = mockTransactionContext(prepareRequest);
+    returnedTrx = ozoneManagerStateMachine.preAppendTransaction(submittedTrx);
     Assert.assertSame(submittedTrx, returnedTrx);
 
-    // TODO: Add test for cancel prepare once it is implemented.
+    Assert.assertEquals(PrepareStatus.PREPARE_GATE_ENABLED,
+        prepareState.getState().getStatus());
+
+    // Cancel prepare is handled in the cancel request apply txn step, not
+    // the pre-append state machine step, so it is tested in other classes.
   }
 
   private TransactionContext mockTransactionContext(OMRequest request) {
diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/upgrade/TestOMCancelPrepareRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/upgrade/TestOMCancelPrepareRequest.java
index 9fcc1c9..9cdbef3 100644
--- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/upgrade/TestOMCancelPrepareRequest.java
+++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/upgrade/TestOMCancelPrepareRequest.java
@@ -63,7 +63,7 @@ public class TestOMCancelPrepareRequest extends TestOMKeyRequest {
     OzoneManagerPrepareState prepareState = ozoneManager.getPrepareState();
     OzoneManagerPrepareState.State state = prepareState.getState();
 
-    Assert.assertEquals(state.getStatus(), PrepareStatus.PREPARE_NOT_STARTED);
+    Assert.assertEquals(state.getStatus(), PrepareStatus.NOT_PREPARED);
     Assert.assertEquals(state.getIndex(),
         OzoneManagerPrepareState.NO_PREPARE_INDEX);
     Assert.assertFalse(prepareState.getPrepareMarkerFile().exists());
diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/upgrade/TestOzoneManagerPrepareState.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/upgrade/TestOzoneManagerPrepareState.java
index d9307ee..bd58493 100644
--- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/upgrade/TestOzoneManagerPrepareState.java
+++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/upgrade/TestOzoneManagerPrepareState.java
@@ -18,10 +18,12 @@
 package org.apache.hadoop.ozone.om.upgrade;
 
 import org.apache.hadoop.hdds.HddsConfigKeys;
+import org.apache.hadoop.ozone.om.exceptions.OMException;
 import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.PrepareStatusResponse.PrepareStatus;
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.ozone.om.OzoneManagerPrepareState;
 import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type;
+import org.apache.hadoop.test.LambdaTestUtils;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
@@ -115,19 +117,19 @@ public class TestOzoneManagerPrepareState {
   @Test
   public void testRestoreCorrectIndex() throws Exception {
     writePrepareMarkerFile(TEST_INDEX);
-    PrepareStatus status = prepareState.restorePrepare(TEST_INDEX);
-    Assert.assertEquals(PrepareStatus.PREPARE_COMPLETED, status);
-
+    prepareState.restorePrepareFromFile(TEST_INDEX);
     assertPrepareCompleted(TEST_INDEX);
   }
 
   @Test
   public void testRestoreIncorrectIndex() throws Exception {
     writePrepareMarkerFile(TEST_INDEX);
-    PrepareStatus status = prepareState.restorePrepare(TEST_INDEX + 1);
-    Assert.assertEquals(PrepareStatus.PREPARE_NOT_STARTED, status);
-
-    assertPrepareNotStarted();
+    // Ratis is allowed to apply transactions after prepare, like commit and
+    // conf entries, but OM write requests are not allowed.
+    // Therefore prepare restoration should only fail if the marker file index
+    // is less than the OM's txn index.
+    assertPrepareFailedException(() ->
+        prepareState.restorePrepareFromFile(TEST_INDEX - 1));
   }
 
   @Test
@@ -136,61 +138,54 @@ public class TestOzoneManagerPrepareState {
     RANDOM.nextBytes(randomBytes);
     writePrepareMarkerFile(randomBytes);
 
-    PrepareStatus status = prepareState.restorePrepare(TEST_INDEX);
-    Assert.assertEquals(PrepareStatus.PREPARE_NOT_STARTED, status);
-
-    assertPrepareNotStarted();
+    assertPrepareFailedException(() ->
+        prepareState.restorePrepareFromFile(TEST_INDEX));
   }
 
   @Test
   public void testRestoreEmptyMarkerFile() throws Exception {
     writePrepareMarkerFile(new byte[]{});
 
-    PrepareStatus status = prepareState.restorePrepare(TEST_INDEX);
-    Assert.assertEquals(PrepareStatus.PREPARE_NOT_STARTED, status);
-
-    assertPrepareNotStarted();
+    assertPrepareFailedException(() ->
+        prepareState.restorePrepareFromFile(TEST_INDEX));
   }
 
   @Test
   public void testRestoreNoMarkerFile() throws Exception {
-    PrepareStatus status = prepareState.restorePrepare(TEST_INDEX);
-    Assert.assertEquals(PrepareStatus.PREPARE_NOT_STARTED, status);
-
+    assertPrepareFailedException(() ->
+        prepareState.restorePrepareFromFile(TEST_INDEX));
     assertPrepareNotStarted();
   }
 
   @Test
-  public void testRestoreAfterStart() throws Exception {
+  public void testRestoreWithGateOnly() throws Exception {
     prepareState.enablePrepareGate();
 
-    // If prepare is started but never finished, no marker file is written to
-    // disk, and restoring the prepare state on an OM restart should leave it
-    // not prepared.
-    PrepareStatus status = prepareState.restorePrepare(TEST_INDEX);
-    Assert.assertEquals(PrepareStatus.PREPARE_NOT_STARTED, status);
-    assertPrepareNotStarted();
+    // If prepare is started but never finished, (gate is up but no marker
+    // file written), then restoring the prepare state from the file should
+    // fail, but leave the in memory state the same.
+    assertPrepareFailedException(() ->
+        prepareState.restorePrepareFromFile(TEST_INDEX));
+    assertPrepareInProgress();
   }
 
   @Test
   public void testMultipleRestores() throws Exception {
-    PrepareStatus status = prepareState.restorePrepare(TEST_INDEX);
-    Assert.assertEquals(PrepareStatus.PREPARE_NOT_STARTED, status);
+    assertPrepareFailedException(() ->
+        prepareState.restorePrepareFromFile(TEST_INDEX));
     assertPrepareNotStarted();
 
-    status = prepareState.restorePrepare(TEST_INDEX);
-    Assert.assertEquals(PrepareStatus.PREPARE_NOT_STARTED, status);
+    assertPrepareFailedException(() ->
+        prepareState.restorePrepareFromFile(TEST_INDEX));
     assertPrepareNotStarted();
 
     prepareState.enablePrepareGate();
     prepareState.finishPrepare(TEST_INDEX);
 
-    status = prepareState.restorePrepare(TEST_INDEX);
-    Assert.assertEquals(PrepareStatus.PREPARE_COMPLETED, status);
+    prepareState.restorePrepareFromFile(TEST_INDEX);
     assertPrepareCompleted(TEST_INDEX);
 
-    status = prepareState.restorePrepare(TEST_INDEX);
-    Assert.assertEquals(PrepareStatus.PREPARE_COMPLETED, status);
+    prepareState.restorePrepareFromFile(TEST_INDEX);
     assertPrepareCompleted(TEST_INDEX);
   }
 
@@ -226,7 +221,7 @@ public class TestOzoneManagerPrepareState {
 
   private void assertPrepareNotStarted() {
     OzoneManagerPrepareState.State state = prepareState.getState();
-    Assert.assertEquals(PrepareStatus.PREPARE_NOT_STARTED, state.getStatus());
+    Assert.assertEquals(PrepareStatus.NOT_PREPARED, state.getStatus());
     Assert.assertEquals(OzoneManagerPrepareState.NO_PREPARE_INDEX,
         state.getIndex());
     Assert.assertFalse(prepareState.getPrepareMarkerFile().exists());
@@ -236,7 +231,7 @@ public class TestOzoneManagerPrepareState {
 
   private void assertPrepareInProgress() {
     OzoneManagerPrepareState.State state = prepareState.getState();
-    Assert.assertEquals(PrepareStatus.PREPARE_IN_PROGRESS, state.getStatus());
+    Assert.assertEquals(PrepareStatus.PREPARE_GATE_ENABLED, state.getStatus());
     Assert.assertEquals(OzoneManagerPrepareState.NO_PREPARE_INDEX,
         state.getIndex());
     Assert.assertFalse(prepareState.getPrepareMarkerFile().exists());
@@ -271,4 +266,15 @@ public class TestOzoneManagerPrepareState {
       Assert.assertTrue(prepareState.requestAllowed(cmdType));
     }
   }
+
+  private void assertPrepareFailedException(LambdaTestUtils.VoidCallable call)
+      throws Exception {
+    try {
+      call.call();
+    } catch (OMException ex) {
+      if (ex.getResult() != OMException.ResultCodes.PREPARE_FAILED) {
+        throw ex;
+      }
+    }
+  }
 }

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org