You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@dolphinscheduler.apache.org by lg...@apache.org on 2020/05/28 04:31:22 UTC
[incubator-dolphinscheduler] branch dev-1.3.0 updated: [bug fix]
fix: The workflow is fault-tolerant and 2 task instances are generated
(#2833)
This is an automated email from the ASF dual-hosted git repository.
lgcareer pushed a commit to branch dev-1.3.0
in repository https://gitbox.apache.org/repos/asf/incubator-dolphinscheduler.git
The following commit(s) were added to refs/heads/dev-1.3.0 by this push:
new d67436f [bug fix] fix: The workflow is fault-tolerant and 2 task instances are generated (#2833)
d67436f is described below
commit d67436ffad7afe4985d843bc63cab260667ed7d7
Author: bao liang <29...@users.noreply.github.com>
AuthorDate: Thu May 28 12:31:12 2020 +0800
[bug fix] fix: The workflow is fault-tolerant and 2 task instances are generated (#2833)
* feature: add number configuration for master dispatch tasks
* fix bug(#2762) the master would be blocked when worker group not exists
* fix bug(#2762) the master would be blocked when worker group not exists
* fix ut
* fix ut
* fix bug(2781): cannot pause work flow when task state is "submit success"
* fix code smell
* add mysql other param blank judge
* test
* update comments
* update comments
* add ut
* fix bug: Restart the worker service again, the previously submitted successful tasks are not executed
* update comments
* add sleep
* add null point check
* fix bug:After the master is fault-tolerant, it cannot resume operation
* fix bug: do not failover the host is 'NULL' process
* fix bug:worker failover error.
Co-authored-by: baoliang <ba...@analysys.com.cn>
---
.../server/worker/processor/TaskCallbackService.java | 10 +++++++---
.../org/apache/dolphinscheduler/server/zk/ZKMasterClient.java | 2 +-
.../apache/dolphinscheduler/service/zk/AbstractZKClient.java | 2 +-
3 files changed, 9 insertions(+), 5 deletions(-)
diff --git a/dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/processor/TaskCallbackService.java b/dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/processor/TaskCallbackService.java
index 1e8bf9d..1731407 100644
--- a/dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/processor/TaskCallbackService.java
+++ b/dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/processor/TaskCallbackService.java
@@ -95,14 +95,18 @@ public class TaskCallbackService {
if(newChannel != null){
return getRemoteChannel(newChannel, nettyRemoteChannel.getOpaque(), taskInstanceId);
}
- logger.warn("original master : {} is not reachable, random select master", nettyRemoteChannel.getHost());
+ logger.warn("original master : {} for task : {} is not reachable, random select master",
+ nettyRemoteChannel.getHost(),
+ taskInstanceId);
Set<String> masterNodes = null;
while (Stopper.isRunning()) {
masterNodes = zookeeperRegistryCenter.getMasterNodesDirectly();
if (CollectionUtils.isEmpty(masterNodes)) {
- logger.error("no available master node");
ThreadUtils.sleep(SLEEP_TIME_MILLIS);
}else {
+ logger.error("find {} masters for task : {}.",
+ masterNodes.size(),
+ taskInstanceId);
break;
}
}
@@ -112,7 +116,7 @@ public class TaskCallbackService {
return getRemoteChannel(newChannel, nettyRemoteChannel.getOpaque(), taskInstanceId);
}
}
- throw new IllegalStateException(String.format("all available master nodes : %s are not reachable", masterNodes));
+ throw new IllegalStateException(String.format("all available master nodes : %s are not reachable for task: {}", masterNodes, taskInstanceId));
}
private NettyRemoteChannel getRemoteChannel(Channel newChannel, long opaque, int taskInstanceId){
diff --git a/dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/zk/ZKMasterClient.java b/dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/zk/ZKMasterClient.java
index 1b807a7..686d73d 100644
--- a/dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/zk/ZKMasterClient.java
+++ b/dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/zk/ZKMasterClient.java
@@ -262,7 +262,7 @@ public class ZKMasterClient extends AbstractZKClient {
Date workerServerStartDate = null;
List<Server> workerServers = getServersList(ZKNodeType.WORKER);
for(Server workerServer : workerServers){
- if(workerServer.getHost().equals(taskInstance.getHost())){
+ if(taskInstance.getHost().equals(workerServer.getHost() + Constants.COLON + workerServer.getPort())){
workerServerStartDate = workerServer.getCreateTime();
break;
}
diff --git a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/zk/AbstractZKClient.java b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/zk/AbstractZKClient.java
index 2960969..1cc4db6 100644
--- a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/zk/AbstractZKClient.java
+++ b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/zk/AbstractZKClient.java
@@ -187,7 +187,7 @@ public abstract class AbstractZKClient extends ZookeeperCachedOperator {
}
Map<String, String> serverMaps = getServerMaps(zkNodeType);
for(String hostKey : serverMaps.keySet()){
- if(hostKey.startsWith(host)){
+ if(hostKey.contains(host)){
return true;
}
}