You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ae...@apache.org on 2019/03/04 20:01:24 UTC

[hadoop] branch trunk updated: HDDS-1136 : Add metric counters to capture the RocksDB checkpointing statistics. Contributed by Aravindan Vijayan.

This is an automated email from the ASF dual-hosted git repository.

aengineer pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 10b802b  HDDS-1136 : Add metric counters to capture the RocksDB checkpointing statistics. Contributed by Aravindan Vijayan.
10b802b is described below

commit 10b802b84b56a328f76ec9759597077d7652711e
Author: Anu Engineer <ae...@apache.org>
AuthorDate: Mon Mar 4 12:00:16 2019 -0800

    HDDS-1136 : Add metric counters to capture the RocksDB checkpointing statistics.
    Contributed by Aravindan Vijayan.
---
 .../org/apache/hadoop/utils/db/DBCheckpoint.java   |   5 +
 .../hadoop/utils/db/RDBCheckpointManager.java      |  23 ++-
 .../hadoop/ozone/om/TestOMDbCheckpointServlet.java | 176 +++++++++++++++++++++
 .../hadoop/ozone/om/OMDBCheckpointServlet.java     |  26 ++-
 .../java/org/apache/hadoop/ozone/om/OMMetrics.java |  32 ++++
 5 files changed, 258 insertions(+), 4 deletions(-)

diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/DBCheckpoint.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/DBCheckpoint.java
index e25ac92..a3b197a 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/DBCheckpoint.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/DBCheckpoint.java
@@ -43,6 +43,11 @@ public interface DBCheckpoint {
   long getLatestSequenceNumber();
 
   /**
+   * Time taken in milliseconds for the checkpoint to be created.
+   */
+  long checkpointCreationTimeTaken();
+
+  /**
    * Destroy the contents of the specified checkpoint to ensure
    * proper cleanup of the footprint on disk.
    *
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/RDBCheckpointManager.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/RDBCheckpointManager.java
index d44ebaf..ce716c3 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/RDBCheckpointManager.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/RDBCheckpointManager.java
@@ -22,6 +22,8 @@ package org.apache.hadoop.utils.db;
 import java.io.IOException;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.time.Duration;
+import java.time.Instant;
 
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.lang3.StringUtils;
@@ -41,7 +43,6 @@ public class RDBCheckpointManager {
   public static final String RDB_CHECKPOINT_DIR_PREFIX = "rdb_checkpoint_";
   private static final Logger LOG =
       LoggerFactory.getLogger(RDBCheckpointManager.class);
-  public static final String JAVA_TMP_DIR = "java.io.tmpdir";
   private String checkpointNamePrefix = "";
 
   public RDBCheckpointManager(RocksDB rocksDB) {
@@ -79,12 +80,19 @@ public class RDBCheckpointManager {
       checkpointDir += "_" + RDB_CHECKPOINT_DIR_PREFIX + currentTime;
 
       Path checkpointPath = Paths.get(parentDir, checkpointDir);
+      Instant start = Instant.now();
       checkpoint.createCheckpoint(checkpointPath.toString());
+      Instant end = Instant.now();
+
+      long duration = Duration.between(start, end).toMillis();
+      LOG.debug("Created checkpoint at " + checkpointPath.toString() + " in "
+          + duration + " milliseconds");
 
       return new RocksDBCheckpoint(
           checkpointPath,
           currentTime,
-          db.getLatestSequenceNumber()); //Best guesstimate here. Not accurate.
+          db.getLatestSequenceNumber(), //Best guesstimate here. Not accurate.
+          duration);
 
     } catch (RocksDBException e) {
       LOG.error("Unable to create RocksDB Snapshot.", e);
@@ -97,13 +105,16 @@ public class RDBCheckpointManager {
     private Path checkpointLocation;
     private long checkpointTimestamp;
     private long latestSequenceNumber;
+    private long checkpointCreationTimeTaken;
 
     RocksDBCheckpoint(Path checkpointLocation,
                               long snapshotTimestamp,
-                              long latestSequenceNumber) {
+                              long latestSequenceNumber,
+                              long checkpointCreationTimeTaken) {
       this.checkpointLocation = checkpointLocation;
       this.checkpointTimestamp = snapshotTimestamp;
       this.latestSequenceNumber = latestSequenceNumber;
+      this.checkpointCreationTimeTaken = checkpointCreationTimeTaken;
     }
 
     @Override
@@ -122,7 +133,13 @@ public class RDBCheckpointManager {
     }
 
     @Override
+    public long checkpointCreationTimeTaken() {
+      return checkpointCreationTimeTaken;
+    }
+
+    @Override
     public void cleanupCheckpoint() throws IOException {
+      LOG.debug("Cleaning up checkpoint at " + checkpointLocation.toString());
       FileUtils.deleteDirectory(checkpointLocation.toFile());
     }
   }
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java
new file mode 100644
index 0000000..75dd880
--- /dev/null
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java
@@ -0,0 +1,176 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.om;
+
+import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_ENABLED;
+import static org.apache.hadoop.ozone.OzoneConfigKeys.
+    OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.UUID;
+
+import javax.servlet.ServletContext;
+import javax.servlet.ServletException;
+import javax.servlet.ServletOutputStream;
+import javax.servlet.WriteListener;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.ozone.MiniOzoneCluster;
+import org.apache.hadoop.ozone.OzoneConsts;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.Timeout;
+import org.mockito.Matchers;
+
+import static org.apache.hadoop.ozone.OzoneConsts.
+    OZONE_DB_CHECKPOINT_REQUEST_FLUSH;
+import static org.mockito.Mockito.doCallRealMethod;
+import static org.mockito.Mockito.doNothing;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+/**
+ * Class used for testing the OM DB Checkpoint provider servlet.
+ */
+public class TestOMDbCheckpointServlet {
+  private MiniOzoneCluster cluster = null;
+  private OMMetrics omMetrics;
+  private OzoneConfiguration conf;
+  private String clusterId;
+  private String scmId;
+  private String omId;
+
+  @Rule
+  public Timeout timeout = new Timeout(60000);
+
+  /**
+   * Create a MiniDFSCluster for testing.
+   * <p>
+   * Ozone is made active by setting OZONE_ENABLED = true
+   *
+   * @throws IOException
+   */
+  @Before
+  public void init() throws Exception {
+    conf = new OzoneConfiguration();
+    clusterId = UUID.randomUUID().toString();
+    scmId = UUID.randomUUID().toString();
+    omId = UUID.randomUUID().toString();
+    conf.setBoolean(OZONE_ACL_ENABLED, true);
+    conf.setInt(OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS, 2);
+    cluster = MiniOzoneCluster.newBuilder(conf)
+        .setClusterId(clusterId)
+        .setScmId(scmId)
+        .setOmId(omId)
+        .build();
+    cluster.waitForClusterToBeReady();
+    omMetrics = cluster.getOzoneManager().getMetrics();
+  }
+
+  /**
+   * Shutdown MiniDFSCluster.
+   */
+  @After
+  public void shutdown() {
+    if (cluster != null) {
+      cluster.shutdown();
+    }
+  }
+
+  @Test
+  public void testDoGet() throws ServletException, IOException {
+
+    File tempFile = null;
+    try {
+      OMDBCheckpointServlet omDbCheckpointServletMock =
+          mock(OMDBCheckpointServlet.class);
+
+      doCallRealMethod().when(omDbCheckpointServletMock).init();
+
+      HttpServletRequest requestMock = mock(HttpServletRequest.class);
+      HttpServletResponse responseMock = mock(HttpServletResponse.class);
+
+      ServletContext servletContextMock = mock(ServletContext.class);
+      when(omDbCheckpointServletMock.getServletContext())
+          .thenReturn(servletContextMock);
+
+      when(servletContextMock.getAttribute(OzoneConsts.OM_CONTEXT_ATTRIBUTE))
+          .thenReturn(cluster.getOzoneManager());
+      when(requestMock.getParameter(OZONE_DB_CHECKPOINT_REQUEST_FLUSH))
+          .thenReturn("true");
+      doNothing().when(responseMock).setContentType("application/x-tgz");
+      doNothing().when(responseMock).setHeader(Matchers.anyString(),
+          Matchers.anyString());
+
+      tempFile = File.createTempFile("testDoGet_" + System
+          .currentTimeMillis(), ".tar.gz");
+
+      FileOutputStream fileOutputStream = new FileOutputStream(tempFile);
+      when(responseMock.getOutputStream()).thenReturn(
+          new ServletOutputStream() {
+            @Override
+            public boolean isReady() {
+              return true;
+            }
+
+            @Override
+            public void setWriteListener(WriteListener writeListener) {
+            }
+
+            @Override
+            public void write(int b) throws IOException {
+              fileOutputStream.write(b);
+            }
+          });
+
+      doCallRealMethod().when(omDbCheckpointServletMock).doGet(requestMock,
+          responseMock);
+
+      omDbCheckpointServletMock.init();
+
+      Assert.assertTrue(
+          omMetrics.getLastCheckpointCreationTimeTaken() == 0);
+      Assert.assertTrue(
+          omMetrics.getLastCheckpointTarOperationTimeTaken() == 0);
+      Assert.assertTrue(
+          omMetrics.getLastCheckpointStreamingTimeTaken() == 0);
+
+      omDbCheckpointServletMock.doGet(requestMock, responseMock);
+
+      Assert.assertTrue(tempFile.length() > 0);
+      Assert.assertTrue(
+          omMetrics.getLastCheckpointCreationTimeTaken() > 0);
+      Assert.assertTrue(
+          omMetrics.getLastCheckpointTarOperationTimeTaken() > 0);
+      Assert.assertTrue(
+          omMetrics.getLastCheckpointStreamingTimeTaken() > 0);
+    } finally {
+      FileUtils.deleteQuietly(tempFile);
+    }
+
+  }
+}
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java
index edcb7ef..96acfb3 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java
@@ -24,6 +24,8 @@ import static org.apache.hadoop.ozone.OzoneConsts.
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
+import java.time.Duration;
+import java.time.Instant;
 
 import javax.servlet.ServletException;
 import javax.servlet.http.HttpServlet;
@@ -53,6 +55,7 @@ public class OMDBCheckpointServlet extends HttpServlet {
   private static final long serialVersionUID = 1L;
 
   private transient DBStore omDbStore;
+  private transient OMMetrics omMetrics;
   private transient DataTransferThrottler throttler = null;
 
   @Override
@@ -67,6 +70,8 @@ public class OMDBCheckpointServlet extends HttpServlet {
     }
 
     omDbStore = om.getMetadataManager().getStore();
+    omMetrics = om.getMetrics();
+
     OzoneConfiguration configuration = om.getConfiguration();
     long transferBandwidth = configuration.getLongBytes(
         OMConfigKeys.OZONE_DB_CHECKPOINT_TRANSFER_RATE_KEY,
@@ -112,19 +117,38 @@ public class OMDBCheckpointServlet extends HttpServlet {
         response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
         return;
       }
+      omMetrics.setLastCheckpointCreationTimeTaken(
+          checkpoint.checkpointCreationTimeTaken());
+
+      Instant start = Instant.now();
       checkPointTarFile = OmUtils.createTarFile(
           checkpoint.getCheckpointLocation());
-      LOG.info("Tar location = " + checkPointTarFile.getAbsolutePath());
+      Instant end = Instant.now();
+
+      long duration = Duration.between(start, end).toMillis();
+      LOG.debug("Time taken to archive the checkpoint : " + duration +
+          " milliseconds");
+      LOG.info("Checkpoint Tar location = " +
+          checkPointTarFile.getAbsolutePath());
+      omMetrics.setLastCheckpointTarOperationTimeTaken(duration);
+
       response.setContentType("application/x-tgz");
       response.setHeader("Content-Disposition",
           "attachment; filename=\"" +
               checkPointTarFile.getName() + "\"");
 
       checkpointFileInputStream = new FileInputStream(checkPointTarFile);
+      start = Instant.now();
       TransferFsImage.copyFileToStream(response.getOutputStream(),
           checkPointTarFile,
           checkpointFileInputStream,
           throttler);
+      end = Instant.now();
+
+      duration = Duration.between(start, end).toMillis();
+      LOG.debug("Time taken to write the checkpoint to response output " +
+          "stream: " + duration + " milliseconds");
+      omMetrics.setLastCheckpointStreamingTimeTaken(duration);
 
       checkpoint.cleanupCheckpoint();
     } catch (IOException e) {
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java
index 2ef0aca..946b898 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java
@@ -25,6 +25,7 @@ import org.apache.hadoop.metrics2.annotation.Metric;
 import org.apache.hadoop.metrics2.annotation.Metrics;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.metrics2.lib.MutableCounterLong;
+import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
 
 /**
  * This class is for maintaining Ozone Manager statistics.
@@ -105,6 +106,10 @@ public class OMMetrics {
   // few minutes before restart may not be included in this count.
   private @Metric MutableCounterLong numKeys;
 
+  // Metrics to track checkpointing statistics from last run.
+  private @Metric MutableGaugeLong lastCheckpointCreationTimeTaken;
+  private @Metric MutableGaugeLong lastCheckpointTarOperationTimeTaken;
+  private @Metric MutableGaugeLong lastCheckpointStreamingTimeTaken;
 
   public OMMetrics() {
   }
@@ -390,6 +395,18 @@ public class OMMetrics {
     numGetServiceListFails.incr();
   }
 
+  public void setLastCheckpointCreationTimeTaken(long val) {
+    this.lastCheckpointCreationTimeTaken.set(val);
+  }
+
+  public void setLastCheckpointTarOperationTimeTaken(long val) {
+    this.lastCheckpointTarOperationTimeTaken.set(val);
+  }
+
+  public void setLastCheckpointStreamingTimeTaken(long val) {
+    this.lastCheckpointStreamingTimeTaken.set(val);
+  }
+
   @VisibleForTesting
   public long getNumVolumeCreates() {
     return numVolumeCreates.value();
@@ -606,6 +623,21 @@ public class OMMetrics {
     return numAbortMultipartUploadFails.value();
   }
 
+  @VisibleForTesting
+  public long getLastCheckpointCreationTimeTaken() {
+    return lastCheckpointCreationTimeTaken.value();
+  }
+
+  @VisibleForTesting
+  public long getLastCheckpointTarOperationTimeTaken() {
+    return lastCheckpointTarOperationTimeTaken.value();
+  }
+
+  @VisibleForTesting
+  public long getLastCheckpointStreamingTimeTaken() {
+    return lastCheckpointStreamingTimeTaken.value();
+  }
+
   public void unRegister() {
     MetricsSystem ms = DefaultMetricsSystem.instance();
     ms.unregisterSource(SOURCE_NAME);


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org