You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by ad...@apache.org on 2022/10/12 19:10:11 UTC

[ozone] branch master updated: HDDS-7271. Ozone Integration test shows memory leak (graceful shutdown cleanup) (#3826)

This is an automated email from the ASF dual-hosted git repository.

adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 15217fe59a HDDS-7271. Ozone Integration test shows memory leak (graceful shutdown cleanup) (#3826)
15217fe59a is described below

commit 15217fe59a575c9ff1c3b64fcc08843f645c78d4
Author: Sumit Agrawal <su...@gmail.com>
AuthorDate: Thu Oct 13 00:40:04 2022 +0530

    HDDS-7271. Ozone Integration test shows memory leak (graceful shutdown cleanup) (#3826)
---
 .../java/org/apache/hadoop/ozone/HddsDatanodeService.java  |  1 +
 .../ozone/container/common/report/ReportPublisher.java     |  6 ++----
 .../common/statemachine/EndpointStateMachine.java          |  1 +
 .../commandhandler/DeleteBlocksCommandHandler.java         |  1 +
 .../ozone/container/common/report/TestReportPublisher.java |  4 ++--
 .../hadoop/hdds/server/http/RatisDropwizardExports.java    | 14 ++++++++++++++
 .../hadoop/hdds/scm/block/SCMBlockDeletingService.java     |  2 +-
 .../apache/hadoop/hdds/scm/ha/BackgroundSCMService.java    |  2 +-
 .../hadoop/hdds/scm/server/StorageContainerManager.java    |  2 ++
 .../statemachine/commandhandler/TestBlockDeletion.java     |  5 +++--
 .../apache/hadoop/ozone/dn/ratis/TestDnRatisLogParser.java |  1 -
 .../main/java/org/apache/hadoop/ozone/om/OzoneManager.java |  1 +
 12 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java
index c4962b046a..d36a178550 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java
@@ -584,6 +584,7 @@ public class HddsDatanodeService extends GenericCli implements ServicePlugin {
       } catch (Exception ex) {
         LOG.error("Datanode CRL store stop failed", ex);
       }
+      RatisDropwizardExports.clear(ratisMetricsMap);
     }
   }
 
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/report/ReportPublisher.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/report/ReportPublisher.java
index 3d3c819c05..a2553ff788 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/report/ReportPublisher.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/report/ReportPublisher.java
@@ -54,8 +54,8 @@ public abstract class ReportPublisher<T extends Message>
                    ScheduledExecutorService executorService) {
     this.context = stateContext;
     this.executor = executorService;
-    this.executor.schedule(this,
-        getReportFrequency(), TimeUnit.MILLISECONDS);
+    this.executor.scheduleAtFixedRate(this,
+        getReportFrequency(), getReportFrequency(), TimeUnit.MILLISECONDS);
   }
 
   public void setConf(ConfigurationSource conf) {
@@ -71,8 +71,6 @@ public abstract class ReportPublisher<T extends Message>
     if (!executor.isShutdown() &&
         (context.getState() != DatanodeStates.SHUTDOWN)) {
       publishReport();
-      executor.schedule(this,
-          getReportFrequency(), TimeUnit.MILLISECONDS);
     }
   }
 
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java
index 8b3e5dd657..64a65f5b77 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java
@@ -161,6 +161,7 @@ public class EndpointStateMachine
     if (endPoint != null) {
       endPoint.close();
     }
+    executorService.shutdown();
   }
 
   /**
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java
index 3e18574a9a..de92d1793f 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java
@@ -183,6 +183,7 @@ public class DeleteBlocksCommandHandler implements CommandHandler {
           Thread.sleep(2000);
         } catch (InterruptedException e) {
           Thread.currentThread().interrupt();
+          break;
         }
       }
     }
diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/report/TestReportPublisher.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/report/TestReportPublisher.java
index 56fc0761e0..d611caf126 100644
--- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/report/TestReportPublisher.java
+++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/report/TestReportPublisher.java
@@ -98,8 +98,8 @@ public class TestReportPublisher {
     ScheduledExecutorService dummyExecutorService = Mockito.mock(
         ScheduledExecutorService.class);
     publisher.init(dummyContext, dummyExecutorService);
-    verify(dummyExecutorService, times(1)).schedule(publisher,
-        0, TimeUnit.MILLISECONDS);
+    verify(dummyExecutorService, times(1)).scheduleAtFixedRate(publisher,
+        0, 0, TimeUnit.MILLISECONDS);
   }
 
   @Test
diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/RatisDropwizardExports.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/RatisDropwizardExports.java
index 54767b8ae4..76e65a1d72 100644
--- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/RatisDropwizardExports.java
+++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/RatisDropwizardExports.java
@@ -54,6 +54,20 @@ public class RatisDropwizardExports extends DropwizardExports {
         r2 -> deregisterDropwizard(r2, ratisMetricsMap));
   }
 
+  public static void clear(Map<String, RatisDropwizardExports>
+                               ratisMetricsMap) {
+    ratisMetricsMap.entrySet().stream().forEach(e -> {
+      // remove and deregister from registry only one registered
+      // as unregistered element if performed unregister again will
+      // cause null pointer exception by registry
+      Collector c = ratisMetricsMap.remove(e.getKey());
+      if (c != null) {
+        CollectorRegistry.defaultRegistry.unregister(c);
+      }
+    });
+    MetricRegistries.global().clear();
+  }
+
   private static void registerDropwizard(RatisMetricRegistry registry,
       Map<String, RatisDropwizardExports> ratisMetricsMap) {
     RatisDropwizardExports rde = new RatisDropwizardExports(
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java
index 1d1ead8b21..e6f4decd95 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java
@@ -229,7 +229,7 @@ public class SCMBlockDeletingService extends BackgroundService
 
   @Override
   public void stop() {
-    throw new RuntimeException("Not supported operation.");
+    shutdown();
   }
 
   @VisibleForTesting
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/BackgroundSCMService.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/BackgroundSCMService.java
index ff12fefd88..ab476f84c5 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/BackgroundSCMService.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/BackgroundSCMService.java
@@ -126,8 +126,8 @@ public final class BackgroundSCMService implements SCMService {
         log.info("{} Service is not running, skip stop.", getServiceName());
         return;
       }
-      backgroundThread.interrupt();
     }
+    backgroundThread.interrupt();
     log.info("Stopping {} Service.", getServiceName());
   }
 
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
index 403921fd7a..1f06cbb2f8 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
@@ -1648,6 +1648,8 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
     }
 
     scmSafeModeManager.stop();
+    serviceManager.stop();
+    RatisDropwizardExports.clear(ratisMetricsMap);
   }
 
   @Override
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java
index 854ea997fd..f7dee15637 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java
@@ -367,12 +367,13 @@ public class TestBlockDeletion {
         LogCapturer.captureLogs(LegacyReplicationManager.LOG);
     logCapturer.clearOutput();
 
+    Thread.sleep(2000);
     scm.getReplicationManager().processAll();
     ((EventQueue)scm.getEventQueue()).processAll(1000);
     GenericTestUtils.waitFor(() -> logCapturer.getOutput()
-        .contains("Resend delete Container"), 500, 3000);
+        .contains("Resend delete Container"), 500, 5000);
     cluster.restartHddsDatanode(0, true);
-    Thread.sleep(1000);
+    Thread.sleep(2000);
 
     scm.getReplicationManager().processAll();
     ((EventQueue)scm.getEventQueue()).processAll(1000);
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/ratis/TestDnRatisLogParser.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/ratis/TestDnRatisLogParser.java
index efbb73786c..0fa152cb34 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/ratis/TestDnRatisLogParser.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/ratis/TestDnRatisLogParser.java
@@ -75,7 +75,6 @@ public class TestDnRatisLogParser {
 
   @Test
   public void testRatisLogParsing() throws Exception {
-    cluster.stop();
     OzoneConfiguration conf = cluster.getHddsDatanodes().get(0).getConf();
     String path =
         conf.get(OzoneConfigKeys.DFS_CONTAINER_RATIS_DATANODE_STORAGE_DIR);
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
index d089569a4e..6e1bd37af2 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
@@ -2081,6 +2081,7 @@ public final class OzoneManager extends ServiceRuntimeInfoImpl
         omSnapshotProvider.stop();
       }
       OMPerformanceMetrics.unregister();
+      RatisDropwizardExports.clear(ratisMetricsMap);
     } catch (Exception e) {
       LOG.error("OzoneManager stop failed.", e);
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org