You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pulsar.apache.org by pe...@apache.org on 2021/02/18 15:57:09 UTC
[pulsar] 15/27: Fix testBrokerSelectionForAntiAffinityGroup by
increasing OverloadedThreshold (#9393)
This is an automated email from the ASF dual-hosted git repository.
penghui pushed a commit to branch branch-2.7
in repository https://gitbox.apache.org/repos/asf/pulsar.git
commit 05d414d642ccdec029f4deee8aafa2dbac4e4de8
Author: Michael Marshall <47...@users.noreply.github.com>
AuthorDate: Wed Feb 10 18:45:20 2021 -0700
Fix testBrokerSelectionForAntiAffinityGroup by increasing OverloadedThreshold (#9393)
Fixes: #6368
Flaky-test: `AntiAffinityNamespaceGroupTest.testBrokerSelectionForAntiAffinityGroup`
TestClass is flaky. The testMethod test method fails sporadically.
See #6368 for example failures as well as for my analysis and detailed justification for this change.
In short, by setting the `LoadBalancerBrokerOverloadedThresholdPercentage` to `100`, we remove the main edge case that allows two namespaces in the same anti-affinity group to get placed on the same broker.
Note that I am assuming the following method will never return a value greater than 1, which could lead to test failure. https://github.com/apache/pulsar/blob/85f3ff4edbaa10c7894af8ad823cbce37b13829c/pulsar-common/src/main/java/org/apache/pulsar/policies/data/loadbalancer/LocalBrokerData.java#L214-L217
(cherry picked from commit 610b17d6433b0894a541421841b525741f8d5d40)
---
.../loadbalance/impl/GenericBrokerHostUsageImpl.java | 19 ++++++++++++-------
.../loadbalance/impl/LinuxBrokerHostUsageImpl.java | 7 ++++---
.../loadbalance/AntiAffinityNamespaceGroupTest.java | 11 +++++++++++
3 files changed, 27 insertions(+), 10 deletions(-)
diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/impl/GenericBrokerHostUsageImpl.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/impl/GenericBrokerHostUsageImpl.java
index 9d3de7b..14c670c 100644
--- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/impl/GenericBrokerHostUsageImpl.java
+++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/impl/GenericBrokerHostUsageImpl.java
@@ -52,8 +52,13 @@ public class GenericBrokerHostUsageImpl implements BrokerHostUsage {
this.systemBean = (OperatingSystemMXBean) ManagementFactory.getOperatingSystemMXBean();
this.usage = new SystemResourceUsage();
this.totalCpuLimit = getTotalCpuLimit();
- executorService.scheduleAtFixedRate(this::checkCpuLoad, 0, CPU_CHECK_MILLIS, TimeUnit.MILLISECONDS);
- executorService.scheduleAtFixedRate(this::doCalculateBrokerHostUsage, 0, hostUsageCheckIntervalMin, TimeUnit.MINUTES);
+
+ // Call now to initialize values before the constructor returns
+ calculateBrokerHostUsage();
+ executorService.scheduleAtFixedRate(this::checkCpuLoad, CPU_CHECK_MILLIS,
+ CPU_CHECK_MILLIS, TimeUnit.MILLISECONDS);
+ executorService.scheduleAtFixedRate(this::doCalculateBrokerHostUsage, hostUsageCheckIntervalMin,
+ hostUsageCheckIntervalMin, TimeUnit.MINUTES);
}
@Override
@@ -61,7 +66,7 @@ public class GenericBrokerHostUsageImpl implements BrokerHostUsage {
return usage;
}
- private void checkCpuLoad() {
+ private synchronized void checkCpuLoad() {
cpuUsageSum += systemBean.getSystemCpuLoad();
cpuUsageCount++;
}
@@ -84,7 +89,10 @@ public class GenericBrokerHostUsageImpl implements BrokerHostUsage {
return 100 * Runtime.getRuntime().availableProcessors();
}
- private double getTotalCpuUsage() {
+ private synchronized double getTotalCpuUsage() {
+ if (cpuUsageCount == 0) {
+ return 0;
+ }
double cpuUsage = cpuUsageSum / cpuUsageCount;
cpuUsageSum = 0d;
cpuUsageCount = 0;
@@ -92,9 +100,6 @@ public class GenericBrokerHostUsageImpl implements BrokerHostUsage {
}
private ResourceUsage getCpuUsage() {
- if (cpuUsageCount == 0) {
- return new ResourceUsage(0, totalCpuLimit);
- }
return new ResourceUsage(getTotalCpuUsage() * totalCpuLimit, totalCpuLimit);
}
diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/impl/LinuxBrokerHostUsageImpl.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/impl/LinuxBrokerHostUsageImpl.java
index 992b743..e18ff91 100644
--- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/impl/LinuxBrokerHostUsageImpl.java
+++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/impl/LinuxBrokerHostUsageImpl.java
@@ -78,8 +78,6 @@ public class LinuxBrokerHostUsageImpl implements BrokerHostUsage {
this.lastCollection = 0L;
this.usage = new SystemResourceUsage();
this.overrideBrokerNicSpeedGbps = overrideBrokerNicSpeedGbps;
- executorService.scheduleAtFixedRate(this::calculateBrokerHostUsage, 0,
- hostUsageCheckIntervalMin, TimeUnit.MINUTES);
boolean isCGroupsEnabled = false;
try {
@@ -87,9 +85,12 @@ public class LinuxBrokerHostUsageImpl implements BrokerHostUsage {
} catch (Exception e) {
log.warn("Failed to check cgroup CPU usage file: {}", e.getMessage());
}
-
this.isCGroupsEnabled = isCGroupsEnabled;
+
+ // Call now to initialize values before the constructor returns
calculateBrokerHostUsage();
+ executorService.scheduleAtFixedRate(this::calculateBrokerHostUsage, hostUsageCheckIntervalMin,
+ hostUsageCheckIntervalMin, TimeUnit.MINUTES);
}
@Override
diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/loadbalance/AntiAffinityNamespaceGroupTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/loadbalance/AntiAffinityNamespaceGroupTest.java
index 6551172..2286477 100644
--- a/pulsar-broker/src/test/java/org/apache/pulsar/broker/loadbalance/AntiAffinityNamespaceGroupTest.java
+++ b/pulsar-broker/src/test/java/org/apache/pulsar/broker/loadbalance/AntiAffinityNamespaceGroupTest.java
@@ -113,6 +113,8 @@ public class AntiAffinityNamespaceGroupTest {
config1.setFailureDomainsEnabled(true);
config1.setLoadBalancerEnabled(true);
config1.setAdvertisedAddress("localhost");
+ // Don't want overloaded threshold to affect namespace placement
+ config1.setLoadBalancerBrokerOverloadedThresholdPercentage(400);
createCluster(bkEnsemble.getZkClient(), config1);
pulsar1 = new PulsarService(config1);
pulsar1.start();
@@ -130,6 +132,8 @@ public class AntiAffinityNamespaceGroupTest {
config2.setBrokerServicePort(Optional.of(0));
config2.setFailureDomainsEnabled(true);
config2.setAdvertisedAddress("localhost");
+ // Don't want overloaded threshold to affect namespace placement
+ config2.setLoadBalancerBrokerOverloadedThresholdPercentage(400);
pulsar2 = new PulsarService(config2);
pulsar2.start();
@@ -366,6 +370,13 @@ public class AntiAffinityNamespaceGroupTest {
/**
* It verifies anti-affinity with failure domain enabled with 2 brokers.
*
+ * Note: in this class's setup method, the LoadBalancerBrokerOverloadedThresholdPercentage
+ * is set to 400 to ensure that the overloaded logic doesn't affect the broker selection.
+ * Without that configuration, two namespaces in the same anti-affinity group could
+ * be placed on the same broker. The CPU usage can be over 100%, and if we run with more
+ * than 4 cores, it could conceivably be above 400%. If this test becomes flaky again,
+ * look at the logs to see if there is a mention of an overloaded broker.
+ *
* <pre>
* 1. Register brokers to domain: domain-1: broker1 & domain-2: broker2
* 2. Load-Manager receives a watch and updates brokerToDomain cache with new domain data