You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cloudstack.apache.org by ro...@apache.org on 2020/09/01 10:29:57 UTC

[cloudstack] branch 4.13 updated: influxdb: Avoid out of memory by influxDB (#4291)

This is an automated email from the ASF dual-hosted git repository.

rohit pushed a commit to branch 4.13
in repository https://gitbox.apache.org/repos/asf/cloudstack.git


The following commit(s) were added to refs/heads/4.13 by this push:
     new 5c29d5b  influxdb: Avoid out of memory by influxDB (#4291)
5c29d5b is described below

commit 5c29d5ba453b3441f2fd84232f9d833c288c72ce
Author: Gabriel Beims Bräscher <ga...@apache.org>
AuthorDate: Tue Sep 1 07:29:43 2020 -0300

    influxdb: Avoid out of memory by influxDB (#4291)
    
    After a few hours running with InfluxDB configured, CloudStack hangs due to OutOfMemoryException raised. The exception happens at com.cloud.server.StatsCollector.writeBatches(StatsCollector.java:1510):
    
    2020-08-12 21:19:00,972 ERROR [c.c.s.StatsCollector] (StatsCollector-6:ctx-0a4cfe6a) (logid:03a7ba48) Error trying to retrieve host stats
    java.lang.OutOfMemoryError: unable to create new native thread
            ...
            at org.influxdb.impl.BatchProcessor.<init>(BatchProcessor.java:294)
            at org.influxdb.impl.BatchProcessor$Builder.build(BatchProcessor.java:201)
            at org.influxdb.impl.InfluxDBImpl.enableBatch(InfluxDBImpl.java:311)
            at com.cloud.server.StatsCollector.writeBatches(StatsCollector.java:1510)
            at com.cloud.server.StatsCollector$AbstractStatsCollector.sendMetricsToInfluxdb(StatsCollector.java:1351)
            at com.cloud.server.StatsCollector$HostCollector.runInContext(StatsCollector.java:522)
    Context on InfluxDB Batch: Enabling batch on InfluxDB is great and speeds writing but it requires caution to avoid Zombie threads.
    
    Solution: This happens because the batching feature creates an internal thread pool that needs to be shut down explicitly; therefore, it is important to add: influxDB.close().
---
 pom.xml                                            |  2 +-
 .../main/java/com/cloud/server/StatsCollector.java | 30 +++++++++++++---------
 2 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/pom.xml b/pom.xml
index c38b840..64a34f7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -130,7 +130,7 @@
         <cs.guava.version>23.6-jre</cs.guava.version>
         <cs.httpclient.version>4.5.4</cs.httpclient.version>
         <cs.httpcore.version>4.4.8</cs.httpcore.version>
-        <cs.influxdb-java.version>2.15</cs.influxdb-java.version>
+        <cs.influxdb-java.version>2.20</cs.influxdb-java.version>
         <cs.jackson.version>2.9.2</cs.jackson.version>
         <cs.jasypt.version>1.9.2</cs.jasypt.version>
         <cs.java-ipv6.version>0.16</cs.java-ipv6.version>
diff --git a/server/src/main/java/com/cloud/server/StatsCollector.java b/server/src/main/java/com/cloud/server/StatsCollector.java
index 5683106..3937bd9 100644
--- a/server/src/main/java/com/cloud/server/StatsCollector.java
+++ b/server/src/main/java/com/cloud/server/StatsCollector.java
@@ -1334,21 +1334,25 @@ public class StatsCollector extends ManagerBase implements ComponentMethodInterc
         protected void sendMetricsToInfluxdb(Map<Object, Object> metrics) {
             InfluxDB influxDbConnection = createInfluxDbConnection();
 
-            Pong response = influxDbConnection.ping();
-            if (response.getVersion().equalsIgnoreCase("unknown")) {
-                throw new CloudRuntimeException(String.format("Cannot ping influxdb host %s:%s.", externalStatsHost, externalStatsPort));
-            }
+            try {
+                Pong response = influxDbConnection.ping();
+                if (response.getVersion().equalsIgnoreCase("unknown")) {
+                    throw new CloudRuntimeException(String.format("Cannot ping influxdb host %s:%s.", externalStatsHost, externalStatsPort));
+                }
 
-            Collection<Object> metricsObjects = metrics.values();
-            List<Point> points = new ArrayList<>();
+                Collection<Object> metricsObjects = metrics.values();
+                List<Point> points = new ArrayList<>();
 
-            s_logger.debug(String.format("Sending stats to %s host %s:%s", externalStatsType, externalStatsHost, externalStatsPort));
+                s_logger.debug(String.format("Sending stats to %s host %s:%s", externalStatsType, externalStatsHost, externalStatsPort));
 
-            for (Object metricsObject : metricsObjects) {
-                Point vmPoint = creteInfluxDbPoint(metricsObject);
-                points.add(vmPoint);
+                for (Object metricsObject : metricsObjects) {
+                    Point vmPoint = creteInfluxDbPoint(metricsObject);
+                    points.add(vmPoint);
+                }
+                writeBatches(influxDbConnection, databaseName, points);
+            } finally {
+                influxDbConnection.close();
             }
-            writeBatches(influxDbConnection, databaseName, points);
         }
 
         /**
@@ -1507,7 +1511,9 @@ public class StatsCollector extends ManagerBase implements ComponentMethodInterc
      */
     protected void writeBatches(InfluxDB influxDbConnection, String dbName, List<Point> points) {
         BatchPoints batchPoints = BatchPoints.database(dbName).build();
-        influxDbConnection.enableBatch(BatchOptions.DEFAULTS);
+        if(!influxDbConnection.isBatchEnabled()){
+            influxDbConnection.enableBatch(BatchOptions.DEFAULTS);
+        }
 
         for (Point point : points) {
             batchPoints.point(point);