You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-issues@hadoop.apache.org by GitBox <gi...@apache.org> on 2020/07/29 07:14:32 UTC

[GitHub] [hadoop] szetszwo commented on a change in pull request #2165: HDFS-15481. Ordered snapshot deletion: garbage collect deleted snapshots

szetszwo commented on a change in pull request #2165:
URL: https://github.com/apache/hadoop/pull/2165#discussion_r461248171



##########
File path: hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotDeletionGc.java
##########
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.snapshot;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.util.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Timer;
+import java.util.TimerTask;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
+
+import static org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager.DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS;
+import static org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager.DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS_DEFAULT;
+
+public class SnapshotDeletionGc {
+  public static final Logger LOG = LoggerFactory.getLogger(
+      SnapshotDeletionGc.class);
+
+  private final FSNamesystem namesystem;
+  private final int deletionOrderedGcPeriodMs;
+  private final AtomicReference<Timer> timer = new AtomicReference<>();
+
+  public SnapshotDeletionGc(FSNamesystem namesystem, Configuration conf) {
+    this.namesystem = namesystem;
+
+    this.deletionOrderedGcPeriodMs = conf.getInt(
+        DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS,
+        DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS_DEFAULT);
+    LOG.info("{} = {}", DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS,
+        deletionOrderedGcPeriodMs);
+  }
+
+  public void schedule() {
+    if (timer.get() != null) {
+      return;
+    }
+    final Timer t = new Timer(getClass().getSimpleName(), true);
+    if (timer.compareAndSet(null, t)) {
+      LOG.info("Schedule at fixed rate {}",
+          StringUtils.formatTime(deletionOrderedGcPeriodMs));
+      t.scheduleAtFixedRate(new GcTask(),
+          deletionOrderedGcPeriodMs, deletionOrderedGcPeriodMs);
+    }
+  }
+
+  public void cancel() {
+    final Timer t = timer.getAndSet(null);
+    if (t != null) {
+      LOG.info("cancel");
+      t.cancel();
+    }
+  }
+
+  private void gcDeletedSnapshot(String name) {
+    final Snapshot.Root deleted;
+    namesystem.readLock();

Review comment:
       This is a good idea.  Let's add the assertion to all the cases when the SnapshotDeletionOrdered feature is enabled.

##########
File path: hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java
##########
@@ -629,4 +641,36 @@ public void shutdown() {
         s.getRoot().getLocalName(), s.getRoot().getFullPathName(),
         s.getRoot().getModificationTime());
   }
-}
+
+  Snapshot.Root chooseDeletedSnapshot() {
+    final List<INodeDirectory> dirs = getSnapshottableDirs();
+    Collections.shuffle(dirs);

Review comment:
       Sure.

##########
File path: hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
##########
@@ -1284,6 +1288,7 @@ void startCommonServices(Configuration conf, HAContext haContext) throws IOExcep
       dir.setINodeAttributeProvider(inodeAttributeProvider);
     }
     snapshotManager.registerMXBean();
+    snapshotDeletionGc.schedule();

Review comment:
       Sure.  Thanks for the suggestion.

##########
File path: hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java
##########
@@ -382,6 +395,14 @@ public void deleteSnapshot(final INodesInPath iip, final String snapshotName,
             EnumSet.of(XAttrSetFlag.CREATE, XAttrSetFlag.REPLACE));
         return;
       }
+
+      // assert if it is deleting the first snapshot
+      final INodeDirectoryAttributes first = snapshottable.getDiffs().getFirstSnapshotINode();
+      if (snapshot.getRoot() != first) {
+        throw new IllegalStateException("Failed to delete snapshot " + snapshotName

Review comment:
       No.  gcDeletedSnapshot will end up here.

##########
File path: hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
##########
@@ -7166,6 +7175,29 @@ void deleteSnapshot(String snapshotRoot, String snapshotName,
     logAuditEvent(true, operationName, rootPath, null, null);
   }
 
+  public void gcDeletedSnapshot(String snapshotRoot, String snapshotName)
+      throws IOException {
+    final String operationName = "gcDeletedSnapshot";
+    String rootPath = null;
+    final INode.BlocksMapUpdateInfo blocksToBeDeleted;
+
+    checkOperation(OperationCategory.WRITE);
+    writeLock();
+    try {
+      checkOperation(OperationCategory.WRITE);
+      rootPath = Snapshot.getSnapshotPath(snapshotRoot, snapshotName);
+      checkNameNodeSafeMode("Cannot gcDeletedSnapshot for " + rootPath);
+
+      final long now = Time.now();

Review comment:
       Let's check if the snapshot isMarkedAsDeleted.  The "first snapshot" check is already in the code.

##########
File path: hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotDeletionGc.java
##########
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.snapshot;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.util.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Timer;
+import java.util.TimerTask;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
+
+import static org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager.DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS;
+import static org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager.DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS_DEFAULT;
+
+public class SnapshotDeletionGc {
+  public static final Logger LOG = LoggerFactory.getLogger(
+      SnapshotDeletionGc.class);
+
+  private final FSNamesystem namesystem;
+  private final long deletionOrderedGcPeriodMs;
+  private final AtomicReference<Timer> timer = new AtomicReference<>();
+
+  public SnapshotDeletionGc(FSNamesystem namesystem, Configuration conf) {
+    this.namesystem = namesystem;
+
+    this.deletionOrderedGcPeriodMs = conf.getLong(
+        DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS,
+        DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS_DEFAULT);
+    LOG.info("{} = {}", DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS,
+        deletionOrderedGcPeriodMs);
+  }
+
+  public void schedule() {
+    if (timer.get() != null) {
+      return;
+    }
+    final Timer t = new Timer(getClass().getSimpleName(), true);
+    if (timer.compareAndSet(null, t)) {
+      LOG.info("Schedule at fixed rate {}",
+          StringUtils.formatTime(deletionOrderedGcPeriodMs));
+      t.scheduleAtFixedRate(new GcTask(),
+          deletionOrderedGcPeriodMs, deletionOrderedGcPeriodMs);
+    }
+  }
+
+  public void cancel() {
+    final Timer t = timer.getAndSet(null);
+    if (t != null) {
+      LOG.info("cancel");
+      t.cancel();
+    }
+  }
+
+  private void gcDeletedSnapshot(String name) {
+    final Snapshot.Root deleted;
+    namesystem.readLock();
+    try {
+      deleted = namesystem.getSnapshotManager().chooseDeletedSnapshot();
+    } finally {
+      namesystem.readUnlock();
+    }
+    if (deleted == null) {
+      LOG.trace("{}: no snapshots are marked as deleted.", name);
+      return;
+    }
+
+    final String snapshotRoot = deleted.getRootFullPathName();
+    final String snapshotName = deleted.getLocalName();
+    LOG.info("{}: delete snapshot {} from {}",
+        name, snapshotName, snapshotRoot);
+
+    try {
+      namesystem.gcDeletedSnapshot(snapshotRoot, snapshotName);
+    } catch (Throwable e) {
+      LOG.warn("Failed to gcDeletedSnapshot " + deleted.getFullPathName(), e);

Review comment:
       The user could be also deleting the snapshot since we only have read lock earlier.  The gcDeletedSnapshot here will fail.
   
   We cannot kill a production system when a snapshot cannot be deleted.
   
   I could change the log to error.

##########
File path: hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotDeletionGc.java
##########
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.snapshot;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.util.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Timer;
+import java.util.TimerTask;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
+
+import static org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager.DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS;
+import static org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager.DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS_DEFAULT;
+
+public class SnapshotDeletionGc {
+  public static final Logger LOG = LoggerFactory.getLogger(
+      SnapshotDeletionGc.class);
+
+  private final FSNamesystem namesystem;
+  private final long deletionOrderedGcPeriodMs;
+  private final AtomicReference<Timer> timer = new AtomicReference<>();
+
+  public SnapshotDeletionGc(FSNamesystem namesystem, Configuration conf) {
+    this.namesystem = namesystem;
+
+    this.deletionOrderedGcPeriodMs = conf.getLong(
+        DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS,
+        DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS_DEFAULT);
+    LOG.info("{} = {}", DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS,
+        deletionOrderedGcPeriodMs);
+  }
+
+  public void schedule() {
+    if (timer.get() != null) {
+      return;
+    }
+    final Timer t = new Timer(getClass().getSimpleName(), true);
+    if (timer.compareAndSet(null, t)) {
+      LOG.info("Schedule at fixed rate {}",
+          StringUtils.formatTime(deletionOrderedGcPeriodMs));
+      t.scheduleAtFixedRate(new GcTask(),
+          deletionOrderedGcPeriodMs, deletionOrderedGcPeriodMs);
+    }
+  }
+
+  public void cancel() {
+    final Timer t = timer.getAndSet(null);
+    if (t != null) {
+      LOG.info("cancel");
+      t.cancel();
+    }
+  }
+
+  private void gcDeletedSnapshot(String name) {
+    final Snapshot.Root deleted;
+    namesystem.readLock();
+    try {
+      deleted = namesystem.getSnapshotManager().chooseDeletedSnapshot();
+    } finally {

Review comment:
       Sure

##########
File path: hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java
##########
@@ -382,6 +395,14 @@ public void deleteSnapshot(final INodesInPath iip, final String snapshotName,
             EnumSet.of(XAttrSetFlag.CREATE, XAttrSetFlag.REPLACE));
         return;
       }
+

Review comment:
       @jnp , what is your thought?

##########
File path: hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java
##########
@@ -382,6 +395,14 @@ public void deleteSnapshot(final INodesInPath iip, final String snapshotName,
             EnumSet.of(XAttrSetFlag.CREATE, XAttrSetFlag.REPLACE));
         return;
       }
+

Review comment:
       IMO, if the user is delete the first snapshot.  We should do it right away.  Otherwise, there is no way for the user to free up the resource from the snapshots.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: common-issues-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-issues-help@hadoop.apache.org