You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by du...@apache.org on 2023/04/13 15:21:38 UTC

[ozone] branch master updated: HDDS-8345. [Snapshot] Remove snapshot from SnapshotChainManager in case of failure (#4525)

This is an automated email from the ASF dual-hosted git repository.

duong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 8515c13a1e HDDS-8345. [Snapshot] Remove snapshot from SnapshotChainManager in case of failure (#4525)
8515c13a1e is described below

commit 8515c13a1eda56d16b531b67246e99b932c52318
Author: Hemant Kumar <he...@gmail.com>
AuthorDate: Thu Apr 13 08:21:31 2023 -0700

    HDDS-8345. [Snapshot] Remove snapshot from SnapshotChainManager in case of failure (#4525)
---
 .../ozone/rocksdiff/RocksDBCheckpointDiffer.java   |  2 +-
 .../hadoop/ozone/om/SnapshotChainManager.java      |  2 +-
 .../request/snapshot/OMSnapshotCreateRequest.java  | 39 ++++++++++++++++++++--
 3 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdiff/RocksDBCheckpointDiffer.java b/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdiff/RocksDBCheckpointDiffer.java
index 11ef743a12..8c69770080 100644
--- a/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdiff/RocksDBCheckpointDiffer.java
+++ b/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdiff/RocksDBCheckpointDiffer.java
@@ -1062,7 +1062,7 @@ public class RocksDBCheckpointDiffer implements AutoCloseable {
     } catch (RocksDBException e) {
       LOG.warn("Can't get num of keys in SST '{}': {}", file, e.getMessage());
     } catch (FileNotFoundException e) {
-      LOG.info("Can't find SST '{}'", file, e);
+      LOG.info("Can't find SST '{}'", file);
     }
     CompactionNode fileNode = new CompactionNode(
         file, snapshotID, numKeys, seqNum);
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/SnapshotChainManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/SnapshotChainManager.java
index e57b7c9e9d..8dc6c703e0 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/SnapshotChainManager.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/SnapshotChainManager.java
@@ -84,7 +84,7 @@ public class SnapshotChainManager {
     if (prevGlobalID != null &&
         !snapshotChainGlobal.containsKey(prevGlobalID)) {
       throw new IOException("Snapshot Chain corruption: "
-          + " previous snapshotID given but no associated snapshot "
+          + "previous snapshotID given but no associated snapshot "
           + "found in snapshot chain: SnapshotID "
           + prevGlobalID);
     }
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java
index 6855ff476a..fb082bbe3e 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java
@@ -48,13 +48,13 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
+import java.util.Objects;
 import java.util.UUID;
 
 import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.FILE_ALREADY_EXISTS;
 import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.Resource.BUCKET_LOCK;
 import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.Resource.SNAPSHOT_LOCK;
 
-
 /**
  * Handles CreateSnapshot Request.
  */
@@ -185,6 +185,21 @@ public class OMSnapshotCreateRequest extends OMClientRequest {
       omClientResponse = new OMSnapshotCreateResponse(
           omResponse.build(), snapshotInfo);
     } catch (IOException ex) {
+      // Remove snapshot from the SnapshotChainManager in case of any failure.
+      // It is possible that createSnapshot request fails after snapshot gets
+      // added to snapshot chain manager because couldn't add it to cache/DB.
+      // In that scenario, SnapshotChainManager#globalSnapshotId will point to
+      // failed createSnapshot request's snapshotId but in actual it doesn't
+      // exist in the SnapshotInfo table.
+      // If it doesn't get removed, OM restart will crash on
+      // SnapshotChainManager#loadFromSnapshotInfoTable because it could not
+      // find the previous snapshot which doesn't exist because it was never
+      // added to the SnapshotInfo table.
+      if (Objects.equals(snapshotInfo.getSnapshotID(),
+          snapshotChainManager.getLatestGlobalSnapshot())) {
+        removeSnapshotInfoFromSnapshotChainManager(snapshotChainManager,
+            snapshotInfo);
+      }
       exception = ex;
       omClientResponse = new OMSnapshotCreateResponse(
           createErrorOMResponse(omResponse, exception));
@@ -215,5 +230,25 @@ public class OMSnapshotCreateRequest extends OMClientRequest {
     }
     return omClientResponse;
   }
-  
+
+  /**
+   * Removes the snapshot from the SnapshotChainManager.
+   * In case of any failure, it logs the exception as an error and swallow it.
+   * Ideally, there should not be any failure in deletion.
+   * If it happens, and we throw the exception, we lose the track why snapshot
+   * creation failed itself.
+   * Hence, to not lose that information it is better just log and swallow the
+   * exception.
+   */
+  private void removeSnapshotInfoFromSnapshotChainManager(
+      SnapshotChainManager snapshotChainManager,
+      SnapshotInfo info
+  ) {
+    try {
+      snapshotChainManager.deleteSnapshot(info);
+    } catch (IOException exception) {
+      LOG.error("Failed to remove snapshot: {} from SnapshotChainManager.",
+          info, exception);
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org