You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ja...@apache.org on 2019/04/15 23:35:33 UTC
[incubator-pinot] branch master updated: Set Helix flapping window
for all components (#4105)
This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 754a028 Set Helix flapping window for all components (#4105)
754a028 is described below
commit 754a028f7bc22f0dfe68164ac300f31fdb946bc7
Author: Xiaotian (Jackie) Jiang <17...@users.noreply.github.com>
AuthorDate: Mon Apr 15 16:35:29 2019 -0700
Set Helix flapping window for all components (#4105)
Helix will disconnect the manager and disable the instance if it
detects flapping (too frequent disconnect from ZooKeeper). Setting
flapping time window to a small value can avoid this from
happening. Helix ignores the non-positive value, so set the default
value as 1.
Move the config into CommonConstants
- For server, keep the old behavior
- For broker, change default value from 0 (not supported) to 1
- For controller and minion, add this system property
---
.../broker/helix/DefaultHelixBrokerConfig.java | 5 ---
.../broker/broker/helix/HelixBrokerStarter.java | 11 +++---
.../apache/pinot/common/utils/CommonConstants.java | 27 ++++++++------
.../apache/pinot/controller/ControllerStarter.java | 42 ++++++++++++++--------
.../org/apache/pinot/minion/MinionStarter.java | 11 ++++++
.../helix/DefaultHelixStarterServerConfig.java | 3 --
.../server/starter/helix/HelixServerStarter.java | 28 +++++++--------
7 files changed, 74 insertions(+), 53 deletions(-)
diff --git a/pinot-broker/src/main/java/org/apache/pinot/broker/broker/helix/DefaultHelixBrokerConfig.java b/pinot-broker/src/main/java/org/apache/pinot/broker/broker/helix/DefaultHelixBrokerConfig.java
index c264598..e7ab072 100644
--- a/pinot-broker/src/main/java/org/apache/pinot/broker/broker/helix/DefaultHelixBrokerConfig.java
+++ b/pinot-broker/src/main/java/org/apache/pinot/broker/broker/helix/DefaultHelixBrokerConfig.java
@@ -24,8 +24,6 @@ import org.apache.commons.configuration.PropertiesConfiguration;
public class DefaultHelixBrokerConfig {
- public static final String HELIX_FLAPPING_TIME_WINDOW_NAME = "pinot.broker.helix.flappingTimeWindowMs";
- public static final String DEFAULT_HELIX_FLAPPING_TIMEIWINDWOW_MS = "0";
public static Configuration getDefaultBrokerConf() {
Configuration brokerConf = new PropertiesConfiguration();
@@ -40,9 +38,6 @@ public class DefaultHelixBrokerConfig {
//client properties
brokerConf.addProperty("pinot.broker.client.queryPort", "8099");
- // [PINOT-2435] setting to 0 so it doesn't disconnect from zk
- brokerConf.addProperty("pinot.broker.helix.flappingTimeWindowMs", "0");
-
return brokerConf;
}
diff --git a/pinot-broker/src/main/java/org/apache/pinot/broker/broker/helix/HelixBrokerStarter.java b/pinot-broker/src/main/java/org/apache/pinot/broker/broker/helix/HelixBrokerStarter.java
index 337a6c3..081d891 100644
--- a/pinot-broker/src/main/java/org/apache/pinot/broker/broker/helix/HelixBrokerStarter.java
+++ b/pinot-broker/src/main/java/org/apache/pinot/broker/broker/helix/HelixBrokerStarter.java
@@ -34,6 +34,7 @@ import org.apache.helix.HelixDataAccessor;
import org.apache.helix.HelixManager;
import org.apache.helix.HelixManagerFactory;
import org.apache.helix.InstanceType;
+import org.apache.helix.SystemPropertyKeys;
import org.apache.helix.ZNRecord;
import org.apache.helix.model.InstanceConfig;
import org.apache.helix.model.Message;
@@ -185,10 +186,12 @@ public class HelixBrokerStarter {
}
private void setupHelixSystemProperties() {
- final String helixFlappingTimeWindowPropName = "helixmanager.flappingTimeWindow";
- System.setProperty(helixFlappingTimeWindowPropName, _pinotHelixProperties
- .getString(DefaultHelixBrokerConfig.HELIX_FLAPPING_TIME_WINDOW_NAME,
- DefaultHelixBrokerConfig.DEFAULT_HELIX_FLAPPING_TIMEIWINDWOW_MS));
+ // NOTE: Helix will disconnect the manager and disable the instance if it detects flapping (too frequent disconnect
+ // from ZooKeeper). Setting flapping time window to a small value can avoid this from happening. Helix ignores the
+ // non-positive value, so set the default value as 1.
+ System.setProperty(SystemPropertyKeys.FLAPPING_TIME_WINDOW, _pinotHelixProperties
+ .getString(CommonConstants.Helix.CONFIG_OF_BROKER_FLAPPING_TIME_WINDOW_MS,
+ CommonConstants.Helix.DEFAULT_FLAPPING_TIME_WINDOW_MS));
}
private void addInstanceTagIfNeeded(String clusterName, String instanceName) {
diff --git a/pinot-common/src/main/java/org/apache/pinot/common/utils/CommonConstants.java b/pinot-common/src/main/java/org/apache/pinot/common/utils/CommonConstants.java
index 137288c..62ed53f 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/utils/CommonConstants.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/utils/CommonConstants.java
@@ -98,13 +98,14 @@ public class CommonConstants {
public static final int DEFAULT_BROKER_QUERY_PORT = 8099;
public static final String KEY_OF_SERVER_NETTY_HOST = "pinot.server.netty.host";
- public static final String HELIX_MANAGER_FLAPPING_TIME_WINDOW_KEY = "helixmanager.flappingTimeWindow";
- public static final String HELIX_MANAGER_MAX_DISCONNECT_THRESHOLD_KEY = "helixmanager.maxDisconnectThreshold";
- public static final String CONFIG_OF_HELIX_FLAPPING_TIMEWINDOW_MS = "pinot.server.flapping.timeWindowMs";
- public static final String CONFIG_OF_HELIX_MAX_DISCONNECT_THRESHOLD =
- "pinot.server.flapping.maxDisconnectThreshold";
- public static final String DEFAULT_HELIX_FLAPPING_TIMEWINDOW_MS = "1";
- public static final String DEFAULT_HELIX_FLAPPING_MAX_DISCONNECT_THRESHOLD = "100";
+ // NOTE: Helix will disconnect the manager and disable the instance if it detects flapping (too frequent disconnect
+ // from ZooKeeper). Setting flapping time window to a small value can avoid this from happening. Helix ignores the
+ // non-positive value, so set the default value as 1.
+ public static final String CONFIG_OF_CONTROLLER_FLAPPING_TIME_WINDOW_MS = "pinot.controller.flapping.timeWindowMs";
+ public static final String CONFIG_OF_BROKER_FLAPPING_TIME_WINDOW_MS = "pinot.broker.flapping.timeWindowMs";
+ public static final String CONFIG_OF_SERVER_FLAPPING_TIME_WINDOW_MS = "pinot.server.flapping.timeWindowMs";
+ public static final String CONFIG_OF_MINION_FLAPPING_TIME_WINDOW_MS = "pinot.minion.flapping.timeWindowMs";
+ public static final String DEFAULT_FLAPPING_TIME_WINDOW_MS = "1";
}
public static class Broker {
@@ -112,7 +113,8 @@ public class CommonConstants {
public static final int DEFAULT_BROKER_QUERY_RESPONSE_LIMIT = Integer.MAX_VALUE;
public static final String CONFIG_OF_BROKER_QUERY_LOG_LENGTH = "pinot.broker.query.log.length";
public static final int DEFAULT_BROKER_QUERY_LOG_LENGTH = Integer.MAX_VALUE;
- public static final String CONFIG_OF_BROKER_QUERY_LOG_MAX_RATE_PER_SECOND = "pinot.broker.query.log.maxRatePerSecond";
+ public static final String CONFIG_OF_BROKER_QUERY_LOG_MAX_RATE_PER_SECOND =
+ "pinot.broker.query.log.maxRatePerSecond";
public static final double DEFAULT_BROKER_QUERY_LOG_MAX_RATE_PER_SECOND = 10_000d;
public static final String CONFIG_OF_BROKER_TIMEOUT_MS = "pinot.broker.timeoutMs";
public static final long DEFAULT_BROKER_TIMEOUT_MS = 10_000L;
@@ -127,7 +129,8 @@ public class CommonConstants {
// Configuration to consider the broker ServiceStatus as being STARTED if the percent of resources (tables) that
// are ONLINE for this this broker has crossed the threshold percentage of the total number of tables
// that it is expected to serve.
- public static final String CONFIG_OF_BROKER_MIN_RESOURCE_PERCENT_FOR_START = "pinot.broker.startup.minResourcePercent";
+ public static final String CONFIG_OF_BROKER_MIN_RESOURCE_PERCENT_FOR_START =
+ "pinot.broker.startup.minResourcePercent";
public static final double DEFAULT_BROKER_MIN_RESOURCE_PERCENT_FOR_START = 100.0;
public static class Request {
@@ -162,7 +165,8 @@ public class CommonConstants {
public static final String CONFIG_OF_ENABLE_DEFAULT_COLUMNS = "pinot.server.instance.enable.default.columns";
public static final String CONFIG_OF_ENABLE_SHUTDOWN_DELAY = "pinot.server.instance.enable.shutdown.delay";
public static final String CONFIG_OF_ENABLE_SPLIT_COMMIT = "pinot.server.instance.enable.split.commit";
- public static final String CONFIG_OF_ENABLE_COMMIT_END_WITH_METADATA = "pinot.server.instance.enable.commitend.metadata";
+ public static final String CONFIG_OF_ENABLE_COMMIT_END_WITH_METADATA =
+ "pinot.server.instance.enable.commitend.metadata";
public static final String CONFIG_OF_REALTIME_OFFHEAP_ALLOCATION = "pinot.server.instance.realtime.alloc.offheap";
public static final String CONFIG_OF_REALTIME_OFFHEAP_DIRECT_ALLOCATION =
"pinot.server.instance.realtime.alloc.offheap.direct";
@@ -175,7 +179,8 @@ public class CommonConstants {
// Configuration to consider the server ServiceStatus as being STARTED if the percent of resources (tables) that
// are ONLINE for this this server has crossed the threshold percentage of the total number of tables
// that it is expected to serve.
- public static final String CONFIG_OF_SERVER_MIN_RESOURCE_PERCENT_FOR_START = "pinot.server.startup.minResourcePercent";
+ public static final String CONFIG_OF_SERVER_MIN_RESOURCE_PERCENT_FOR_START =
+ "pinot.server.startup.minResourcePercent";
public static final double DEFAULT_SERVER_MIN_RESOURCE_PERCENT_FOR_START = 100.0;
public static final int DEFAULT_ADMIN_API_PORT = 8097;
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/ControllerStarter.java b/pinot-controller/src/main/java/org/apache/pinot/controller/ControllerStarter.java
index 10c1c00..9e8a699 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/ControllerStarter.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/ControllerStarter.java
@@ -36,6 +36,7 @@ import org.apache.commons.httpclient.HttpConnectionManager;
import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
import org.apache.commons.io.FileUtils;
import org.apache.helix.HelixManager;
+import org.apache.helix.SystemPropertyKeys;
import org.apache.helix.task.TaskDriver;
import org.apache.pinot.common.Utils;
import org.apache.pinot.common.metrics.ControllerMeter;
@@ -112,6 +113,8 @@ public class ControllerStarter {
public ControllerStarter(ControllerConf conf) {
_config = conf;
+ setupHelixSystemProperties();
+
_controllerMode = conf.getControllerMode();
// Helix related settings.
_helixZkURL = HelixConfig.getAbsoluteZkPathForHelix(_config.getZkStr());
@@ -137,6 +140,15 @@ public class ControllerStarter {
}
}
+ private void setupHelixSystemProperties() {
+ // NOTE: Helix will disconnect the manager and disable the instance if it detects flapping (too frequent disconnect
+ // from ZooKeeper). Setting flapping time window to a small value can avoid this from happening. Helix ignores the
+ // non-positive value, so set the default value as 1.
+ System.setProperty(SystemPropertyKeys.FLAPPING_TIME_WINDOW, _config
+ .getString(CommonConstants.Helix.CONFIG_OF_CONTROLLER_FLAPPING_TIME_WINDOW_MS,
+ CommonConstants.Helix.DEFAULT_FLAPPING_TIME_WINDOW_MS));
+ }
+
public PinotHelixResourceManager getHelixResourceManager() {
return _helixResourceManager;
}
@@ -191,7 +203,8 @@ public class ControllerStarter {
LOGGER.error("Invalid mode: " + _controllerMode);
}
- ServiceStatus.setServiceStatusCallback(new ServiceStatus.MultipleCallbackServiceStatusCallback(_serviceStatusCallbackList));
+ ServiceStatus
+ .setServiceStatusCallback(new ServiceStatus.MultipleCallbackServiceStatusCallback(_serviceStatusCallbackList));
_controllerMetrics.initializeGlobalMeters();
}
@@ -241,7 +254,8 @@ public class ControllerStarter {
// Helix resource manager must be started in order to create PinotLLCRealtimeSegmentManager
LOGGER.info("Starting realtime segment manager");
- PinotLLCRealtimeSegmentManager.create(_helixResourceManager, _config, _controllerMetrics, _controllerLeadershipManager);
+ PinotLLCRealtimeSegmentManager
+ .create(_helixResourceManager, _config, _controllerMetrics, _controllerLeadershipManager);
_realtimeSegmentsManager = new PinotRealtimeSegmentManager(_helixResourceManager, _controllerLeadershipManager);
_realtimeSegmentsManager.start(_controllerMetrics);
@@ -414,18 +428,18 @@ public class ControllerStarter {
}
public void stop() {
- switch (_controllerMode) {
- case DUAL:
- stopPinotController();
- stopHelixController();
- break;
- case PINOT_ONLY:
- stopPinotController();
- break;
- case HELIX_ONLY:
- stopHelixController();
- break;
- }
+ switch (_controllerMode) {
+ case DUAL:
+ stopPinotController();
+ stopHelixController();
+ break;
+ case PINOT_ONLY:
+ stopPinotController();
+ break;
+ case HELIX_ONLY:
+ stopHelixController();
+ break;
+ }
}
private void stopHelixController() {
diff --git a/pinot-minion/src/main/java/org/apache/pinot/minion/MinionStarter.java b/pinot-minion/src/main/java/org/apache/pinot/minion/MinionStarter.java
index 2d17eac..2139fac 100644
--- a/pinot-minion/src/main/java/org/apache/pinot/minion/MinionStarter.java
+++ b/pinot-minion/src/main/java/org/apache/pinot/minion/MinionStarter.java
@@ -28,6 +28,7 @@ import org.apache.commons.configuration.Configuration;
import org.apache.helix.HelixAdmin;
import org.apache.helix.HelixManager;
import org.apache.helix.InstanceType;
+import org.apache.helix.SystemPropertyKeys;
import org.apache.helix.manager.zk.ZKHelixManager;
import org.apache.helix.model.InstanceConfig;
import org.apache.helix.task.TaskStateModelFactory;
@@ -77,11 +78,21 @@ public class MinionStarter {
_instanceId = config.getString(CommonConstants.Helix.Instance.INSTANCE_ID_KEY,
CommonConstants.Minion.INSTANCE_PREFIX + NetUtil.getHostAddress() + "_"
+ CommonConstants.Minion.DEFAULT_HELIX_PORT);
+ setupHelixSystemProperties();
_helixManager = new ZKHelixManager(_helixClusterName, _instanceId, InstanceType.PARTICIPANT, zkAddress);
_taskExecutorFactoryRegistry = new TaskExecutorFactoryRegistry();
_eventObserverFactoryRegistry = new EventObserverFactoryRegistry();
}
+ private void setupHelixSystemProperties() {
+ // NOTE: Helix will disconnect the manager and disable the instance if it detects flapping (too frequent disconnect
+ // from ZooKeeper). Setting flapping time window to a small value can avoid this from happening. Helix ignores the
+ // non-positive value, so set the default value as 1.
+ System.setProperty(SystemPropertyKeys.FLAPPING_TIME_WINDOW, _config
+ .getString(CommonConstants.Helix.CONFIG_OF_MINION_FLAPPING_TIME_WINDOW_MS,
+ CommonConstants.Helix.DEFAULT_FLAPPING_TIME_WINDOW_MS));
+ }
+
/**
* Registers a task executor factory.
* <p>This is for pluggable task executor factories.
diff --git a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/DefaultHelixStarterServerConfig.java b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/DefaultHelixStarterServerConfig.java
index 46f29d5..aa50ca6 100644
--- a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/DefaultHelixStarterServerConfig.java
+++ b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/DefaultHelixStarterServerConfig.java
@@ -64,9 +64,6 @@ public class DefaultHelixStarterServerConfig {
serverConf.addProperty("pinot.server.query.executor.pruner.ValidSegmentPruner.id", "2");
serverConf.addProperty("pinot.server.query.executor.pruner.PartitionSegmentPruner.id", "3");
- serverConf.addProperty(CommonConstants.Helix.CONFIG_OF_HELIX_FLAPPING_TIMEWINDOW_MS,
- CommonConstants.Helix.DEFAULT_HELIX_FLAPPING_TIMEWINDOW_MS);
-
// request handler factory parameters
serverConf.addProperty(CommonConstants.Server.CONFIG_OF_REQUEST_HANDLER_FACTORY_CLASS,
CommonConstants.Server.DEFAULT_REQUEST_HANDLER_FACTORY_CLASS);
diff --git a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixServerStarter.java b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixServerStarter.java
index fa491d9..64f7b32 100644
--- a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixServerStarter.java
+++ b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixServerStarter.java
@@ -38,6 +38,7 @@ import org.apache.helix.HelixManagerFactory;
import org.apache.helix.InstanceType;
import org.apache.helix.PropertyKey;
import org.apache.helix.PropertyKey.Builder;
+import org.apache.helix.SystemPropertyKeys;
import org.apache.helix.ZNRecord;
import org.apache.helix.model.CurrentState;
import org.apache.helix.model.ExternalView;
@@ -129,7 +130,7 @@ public class HelixServerStarter {
}
LOGGER.info("Connecting Helix components");
- setupHelixSystemProperties(_helixServerConfig);
+ setupHelixSystemProperties();
// Replace all white-spaces from list of zkServers.
_zkServers = zkServer.replaceAll("\\s+", "");
_helixManager =
@@ -178,8 +179,9 @@ public class HelixServerStarter {
.addPreConnectCallback(() -> serverMetrics.addMeteredGlobalValue(ServerMeter.HELIX_ZOOKEEPER_RECONNECTS, 1L));
// Register the service status handler
- final double minResourcePercentForStartup = _helixServerConfig.getDouble(CommonConstants.Server.CONFIG_OF_SERVER_MIN_RESOURCE_PERCENT_FOR_START,
- CommonConstants.Server.DEFAULT_SERVER_MIN_RESOURCE_PERCENT_FOR_START);
+ double minResourcePercentForStartup = _helixServerConfig
+ .getDouble(CommonConstants.Server.CONFIG_OF_SERVER_MIN_RESOURCE_PERCENT_FOR_START,
+ CommonConstants.Server.DEFAULT_SERVER_MIN_RESOURCE_PERCENT_FOR_START);
ServiceStatus.setServiceStatusCallback(new ServiceStatus.MultipleCallbackServiceStatusCallback(ImmutableList
.of(new ServiceStatus.IdealStateAndCurrentStateMatchServiceStatusCallback(_helixManager, _helixClusterName,
_instanceId, minResourcePercentForStartup),
@@ -330,19 +332,13 @@ public class HelixServerStarter {
}
}
- private void setupHelixSystemProperties(Configuration conf) {
- // [PINOT-2435] [PINOT-3927] Disable helix detection of flapping connection
- // Helix will shutdown and effectively remove the instance from cluster if
- // it detects flapping while the process continues to run
- // Helix ignores the value if it is <= 0. Hence, setting time window to small value
- // and number of connection failures within that window to high value
- System.setProperty(CommonConstants.Helix.HELIX_MANAGER_FLAPPING_TIME_WINDOW_KEY,
- conf.getString(CommonConstants.Helix.CONFIG_OF_HELIX_FLAPPING_TIMEWINDOW_MS,
- CommonConstants.Helix.DEFAULT_HELIX_FLAPPING_TIMEWINDOW_MS));
-
- System.setProperty(CommonConstants.Helix.HELIX_MANAGER_MAX_DISCONNECT_THRESHOLD_KEY,
- conf.getString(CommonConstants.Helix.CONFIG_OF_HELIX_MAX_DISCONNECT_THRESHOLD,
- CommonConstants.Helix.DEFAULT_HELIX_FLAPPING_MAX_DISCONNECT_THRESHOLD));
+ private void setupHelixSystemProperties() {
+ // NOTE: Helix will disconnect the manager and disable the instance if it detects flapping (too frequent disconnect
+ // from ZooKeeper). Setting flapping time window to a small value can avoid this from happening. Helix ignores the
+ // non-positive value, so set the default value as 1.
+ System.setProperty(SystemPropertyKeys.FLAPPING_TIME_WINDOW, _helixServerConfig
+ .getString(CommonConstants.Helix.CONFIG_OF_SERVER_FLAPPING_TIME_WINDOW_MS,
+ CommonConstants.Helix.DEFAULT_FLAPPING_TIME_WINDOW_MS));
}
public void stop() {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org