You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ae...@apache.org on 2019/03/04 20:01:24 UTC
[hadoop] branch trunk updated: HDDS-1136 : Add metric counters to
capture the RocksDB checkpointing statistics. Contributed by Aravindan
Vijayan.
This is an automated email from the ASF dual-hosted git repository.
aengineer pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new 10b802b HDDS-1136 : Add metric counters to capture the RocksDB checkpointing statistics. Contributed by Aravindan Vijayan.
10b802b is described below
commit 10b802b84b56a328f76ec9759597077d7652711e
Author: Anu Engineer <ae...@apache.org>
AuthorDate: Mon Mar 4 12:00:16 2019 -0800
HDDS-1136 : Add metric counters to capture the RocksDB checkpointing statistics.
Contributed by Aravindan Vijayan.
---
.../org/apache/hadoop/utils/db/DBCheckpoint.java | 5 +
.../hadoop/utils/db/RDBCheckpointManager.java | 23 ++-
.../hadoop/ozone/om/TestOMDbCheckpointServlet.java | 176 +++++++++++++++++++++
.../hadoop/ozone/om/OMDBCheckpointServlet.java | 26 ++-
.../java/org/apache/hadoop/ozone/om/OMMetrics.java | 32 ++++
5 files changed, 258 insertions(+), 4 deletions(-)
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/DBCheckpoint.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/DBCheckpoint.java
index e25ac92..a3b197a 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/DBCheckpoint.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/DBCheckpoint.java
@@ -43,6 +43,11 @@ public interface DBCheckpoint {
long getLatestSequenceNumber();
/**
+ * Time taken in milliseconds for the checkpoint to be created.
+ */
+ long checkpointCreationTimeTaken();
+
+ /**
* Destroy the contents of the specified checkpoint to ensure
* proper cleanup of the footprint on disk.
*
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/RDBCheckpointManager.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/RDBCheckpointManager.java
index d44ebaf..ce716c3 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/RDBCheckpointManager.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/RDBCheckpointManager.java
@@ -22,6 +22,8 @@ package org.apache.hadoop.utils.db;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
+import java.time.Duration;
+import java.time.Instant;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
@@ -41,7 +43,6 @@ public class RDBCheckpointManager {
public static final String RDB_CHECKPOINT_DIR_PREFIX = "rdb_checkpoint_";
private static final Logger LOG =
LoggerFactory.getLogger(RDBCheckpointManager.class);
- public static final String JAVA_TMP_DIR = "java.io.tmpdir";
private String checkpointNamePrefix = "";
public RDBCheckpointManager(RocksDB rocksDB) {
@@ -79,12 +80,19 @@ public class RDBCheckpointManager {
checkpointDir += "_" + RDB_CHECKPOINT_DIR_PREFIX + currentTime;
Path checkpointPath = Paths.get(parentDir, checkpointDir);
+ Instant start = Instant.now();
checkpoint.createCheckpoint(checkpointPath.toString());
+ Instant end = Instant.now();
+
+ long duration = Duration.between(start, end).toMillis();
+ LOG.debug("Created checkpoint at " + checkpointPath.toString() + " in "
+ + duration + " milliseconds");
return new RocksDBCheckpoint(
checkpointPath,
currentTime,
- db.getLatestSequenceNumber()); //Best guesstimate here. Not accurate.
+ db.getLatestSequenceNumber(), //Best guesstimate here. Not accurate.
+ duration);
} catch (RocksDBException e) {
LOG.error("Unable to create RocksDB Snapshot.", e);
@@ -97,13 +105,16 @@ public class RDBCheckpointManager {
private Path checkpointLocation;
private long checkpointTimestamp;
private long latestSequenceNumber;
+ private long checkpointCreationTimeTaken;
RocksDBCheckpoint(Path checkpointLocation,
long snapshotTimestamp,
- long latestSequenceNumber) {
+ long latestSequenceNumber,
+ long checkpointCreationTimeTaken) {
this.checkpointLocation = checkpointLocation;
this.checkpointTimestamp = snapshotTimestamp;
this.latestSequenceNumber = latestSequenceNumber;
+ this.checkpointCreationTimeTaken = checkpointCreationTimeTaken;
}
@Override
@@ -122,7 +133,13 @@ public class RDBCheckpointManager {
}
@Override
+ public long checkpointCreationTimeTaken() {
+ return checkpointCreationTimeTaken;
+ }
+
+ @Override
public void cleanupCheckpoint() throws IOException {
+ LOG.debug("Cleaning up checkpoint at " + checkpointLocation.toString());
FileUtils.deleteDirectory(checkpointLocation.toFile());
}
}
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java
new file mode 100644
index 0000000..75dd880
--- /dev/null
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java
@@ -0,0 +1,176 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.om;
+
+import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_ENABLED;
+import static org.apache.hadoop.ozone.OzoneConfigKeys.
+ OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.UUID;
+
+import javax.servlet.ServletContext;
+import javax.servlet.ServletException;
+import javax.servlet.ServletOutputStream;
+import javax.servlet.WriteListener;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.ozone.MiniOzoneCluster;
+import org.apache.hadoop.ozone.OzoneConsts;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.Timeout;
+import org.mockito.Matchers;
+
+import static org.apache.hadoop.ozone.OzoneConsts.
+ OZONE_DB_CHECKPOINT_REQUEST_FLUSH;
+import static org.mockito.Mockito.doCallRealMethod;
+import static org.mockito.Mockito.doNothing;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+/**
+ * Class used for testing the OM DB Checkpoint provider servlet.
+ */
+public class TestOMDbCheckpointServlet {
+ private MiniOzoneCluster cluster = null;
+ private OMMetrics omMetrics;
+ private OzoneConfiguration conf;
+ private String clusterId;
+ private String scmId;
+ private String omId;
+
+ @Rule
+ public Timeout timeout = new Timeout(60000);
+
+ /**
+ * Create a MiniDFSCluster for testing.
+ * <p>
+ * Ozone is made active by setting OZONE_ENABLED = true
+ *
+ * @throws IOException
+ */
+ @Before
+ public void init() throws Exception {
+ conf = new OzoneConfiguration();
+ clusterId = UUID.randomUUID().toString();
+ scmId = UUID.randomUUID().toString();
+ omId = UUID.randomUUID().toString();
+ conf.setBoolean(OZONE_ACL_ENABLED, true);
+ conf.setInt(OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS, 2);
+ cluster = MiniOzoneCluster.newBuilder(conf)
+ .setClusterId(clusterId)
+ .setScmId(scmId)
+ .setOmId(omId)
+ .build();
+ cluster.waitForClusterToBeReady();
+ omMetrics = cluster.getOzoneManager().getMetrics();
+ }
+
+ /**
+ * Shutdown MiniDFSCluster.
+ */
+ @After
+ public void shutdown() {
+ if (cluster != null) {
+ cluster.shutdown();
+ }
+ }
+
+ @Test
+ public void testDoGet() throws ServletException, IOException {
+
+ File tempFile = null;
+ try {
+ OMDBCheckpointServlet omDbCheckpointServletMock =
+ mock(OMDBCheckpointServlet.class);
+
+ doCallRealMethod().when(omDbCheckpointServletMock).init();
+
+ HttpServletRequest requestMock = mock(HttpServletRequest.class);
+ HttpServletResponse responseMock = mock(HttpServletResponse.class);
+
+ ServletContext servletContextMock = mock(ServletContext.class);
+ when(omDbCheckpointServletMock.getServletContext())
+ .thenReturn(servletContextMock);
+
+ when(servletContextMock.getAttribute(OzoneConsts.OM_CONTEXT_ATTRIBUTE))
+ .thenReturn(cluster.getOzoneManager());
+ when(requestMock.getParameter(OZONE_DB_CHECKPOINT_REQUEST_FLUSH))
+ .thenReturn("true");
+ doNothing().when(responseMock).setContentType("application/x-tgz");
+ doNothing().when(responseMock).setHeader(Matchers.anyString(),
+ Matchers.anyString());
+
+ tempFile = File.createTempFile("testDoGet_" + System
+ .currentTimeMillis(), ".tar.gz");
+
+ FileOutputStream fileOutputStream = new FileOutputStream(tempFile);
+ when(responseMock.getOutputStream()).thenReturn(
+ new ServletOutputStream() {
+ @Override
+ public boolean isReady() {
+ return true;
+ }
+
+ @Override
+ public void setWriteListener(WriteListener writeListener) {
+ }
+
+ @Override
+ public void write(int b) throws IOException {
+ fileOutputStream.write(b);
+ }
+ });
+
+ doCallRealMethod().when(omDbCheckpointServletMock).doGet(requestMock,
+ responseMock);
+
+ omDbCheckpointServletMock.init();
+
+ Assert.assertTrue(
+ omMetrics.getLastCheckpointCreationTimeTaken() == 0);
+ Assert.assertTrue(
+ omMetrics.getLastCheckpointTarOperationTimeTaken() == 0);
+ Assert.assertTrue(
+ omMetrics.getLastCheckpointStreamingTimeTaken() == 0);
+
+ omDbCheckpointServletMock.doGet(requestMock, responseMock);
+
+ Assert.assertTrue(tempFile.length() > 0);
+ Assert.assertTrue(
+ omMetrics.getLastCheckpointCreationTimeTaken() > 0);
+ Assert.assertTrue(
+ omMetrics.getLastCheckpointTarOperationTimeTaken() > 0);
+ Assert.assertTrue(
+ omMetrics.getLastCheckpointStreamingTimeTaken() > 0);
+ } finally {
+ FileUtils.deleteQuietly(tempFile);
+ }
+
+ }
+}
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java
index edcb7ef..96acfb3 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java
@@ -24,6 +24,8 @@ import static org.apache.hadoop.ozone.OzoneConsts.
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
+import java.time.Duration;
+import java.time.Instant;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
@@ -53,6 +55,7 @@ public class OMDBCheckpointServlet extends HttpServlet {
private static final long serialVersionUID = 1L;
private transient DBStore omDbStore;
+ private transient OMMetrics omMetrics;
private transient DataTransferThrottler throttler = null;
@Override
@@ -67,6 +70,8 @@ public class OMDBCheckpointServlet extends HttpServlet {
}
omDbStore = om.getMetadataManager().getStore();
+ omMetrics = om.getMetrics();
+
OzoneConfiguration configuration = om.getConfiguration();
long transferBandwidth = configuration.getLongBytes(
OMConfigKeys.OZONE_DB_CHECKPOINT_TRANSFER_RATE_KEY,
@@ -112,19 +117,38 @@ public class OMDBCheckpointServlet extends HttpServlet {
response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
return;
}
+ omMetrics.setLastCheckpointCreationTimeTaken(
+ checkpoint.checkpointCreationTimeTaken());
+
+ Instant start = Instant.now();
checkPointTarFile = OmUtils.createTarFile(
checkpoint.getCheckpointLocation());
- LOG.info("Tar location = " + checkPointTarFile.getAbsolutePath());
+ Instant end = Instant.now();
+
+ long duration = Duration.between(start, end).toMillis();
+ LOG.debug("Time taken to archive the checkpoint : " + duration +
+ " milliseconds");
+ LOG.info("Checkpoint Tar location = " +
+ checkPointTarFile.getAbsolutePath());
+ omMetrics.setLastCheckpointTarOperationTimeTaken(duration);
+
response.setContentType("application/x-tgz");
response.setHeader("Content-Disposition",
"attachment; filename=\"" +
checkPointTarFile.getName() + "\"");
checkpointFileInputStream = new FileInputStream(checkPointTarFile);
+ start = Instant.now();
TransferFsImage.copyFileToStream(response.getOutputStream(),
checkPointTarFile,
checkpointFileInputStream,
throttler);
+ end = Instant.now();
+
+ duration = Duration.between(start, end).toMillis();
+ LOG.debug("Time taken to write the checkpoint to response output " +
+ "stream: " + duration + " milliseconds");
+ omMetrics.setLastCheckpointStreamingTimeTaken(duration);
checkpoint.cleanupCheckpoint();
} catch (IOException e) {
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java
index 2ef0aca..946b898 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java
@@ -25,6 +25,7 @@ import org.apache.hadoop.metrics2.annotation.Metric;
import org.apache.hadoop.metrics2.annotation.Metrics;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
+import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
/**
* This class is for maintaining Ozone Manager statistics.
@@ -105,6 +106,10 @@ public class OMMetrics {
// few minutes before restart may not be included in this count.
private @Metric MutableCounterLong numKeys;
+ // Metrics to track checkpointing statistics from last run.
+ private @Metric MutableGaugeLong lastCheckpointCreationTimeTaken;
+ private @Metric MutableGaugeLong lastCheckpointTarOperationTimeTaken;
+ private @Metric MutableGaugeLong lastCheckpointStreamingTimeTaken;
public OMMetrics() {
}
@@ -390,6 +395,18 @@ public class OMMetrics {
numGetServiceListFails.incr();
}
+ public void setLastCheckpointCreationTimeTaken(long val) {
+ this.lastCheckpointCreationTimeTaken.set(val);
+ }
+
+ public void setLastCheckpointTarOperationTimeTaken(long val) {
+ this.lastCheckpointTarOperationTimeTaken.set(val);
+ }
+
+ public void setLastCheckpointStreamingTimeTaken(long val) {
+ this.lastCheckpointStreamingTimeTaken.set(val);
+ }
+
@VisibleForTesting
public long getNumVolumeCreates() {
return numVolumeCreates.value();
@@ -606,6 +623,21 @@ public class OMMetrics {
return numAbortMultipartUploadFails.value();
}
+ @VisibleForTesting
+ public long getLastCheckpointCreationTimeTaken() {
+ return lastCheckpointCreationTimeTaken.value();
+ }
+
+ @VisibleForTesting
+ public long getLastCheckpointTarOperationTimeTaken() {
+ return lastCheckpointTarOperationTimeTaken.value();
+ }
+
+ @VisibleForTesting
+ public long getLastCheckpointStreamingTimeTaken() {
+ return lastCheckpointStreamingTimeTaken.value();
+ }
+
public void unRegister() {
MetricsSystem ms = DefaultMetricsSystem.instance();
ms.unregisterSource(SOURCE_NAME);
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org