You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@dolphinscheduler.apache.org by lg...@apache.org on 2020/05/27 03:33:24 UTC

[incubator-dolphinscheduler] branch dev-1.3.0 updated: [bug fix]fix bug: After the master is fault-tolerant, it cannot resume operation (#2813)

This is an automated email from the ASF dual-hosted git repository.

lgcareer pushed a commit to branch dev-1.3.0
in repository https://gitbox.apache.org/repos/asf/incubator-dolphinscheduler.git


The following commit(s) were added to refs/heads/dev-1.3.0 by this push:
     new 1caac70  [bug fix]fix bug: After the master is fault-tolerant, it cannot resume operation  (#2813)
1caac70 is described below

commit 1caac70215ee62a3bcd984ad05ce4a996f85dd24
Author: bao liang <29...@users.noreply.github.com>
AuthorDate: Wed May 27 11:33:10 2020 +0800

    [bug fix]fix bug: After the master is fault-tolerant, it cannot resume operation  (#2813)
    
    * feature: add number configuration for master dispatch tasks
    
    * fix bug(#2762) the master would be blocked when worker group not exists
    
    * fix bug(#2762) the master would be blocked when worker group not exists
    
    * fix ut
    
    * fix ut
    
    * fix bug(2781): cannot pause work flow when task state is "submit success"
    
    * fix code smell
    
    * add mysql other param blank judge
    
    * test
    
    * update comments
    
    * update comments
    
    * add ut
    
    * fix bug: Restart the worker service again, the previously submitted successful tasks are not executed
    
    * update comments
    
    * add sleep
    
    * add null point check
    
    * fix bug:After the master is fault-tolerant, it cannot resume operation
    
    * fix bug: do not failover the host is 'NULL' process
    
    Co-authored-by: baoliang <ba...@analysys.com.cn>
---
 .../java/org/apache/dolphinscheduler/server/zk/ZKMasterClient.java  | 3 +++
 .../org/apache/dolphinscheduler/service/process/ProcessService.java | 6 +-----
 .../org/apache/dolphinscheduler/service/zk/AbstractZKClient.java    | 6 +++++-
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/zk/ZKMasterClient.java b/dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/zk/ZKMasterClient.java
index 69aecee..1b807a7 100644
--- a/dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/zk/ZKMasterClient.java
+++ b/dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/zk/ZKMasterClient.java
@@ -335,6 +335,9 @@ public class ZKMasterClient extends AbstractZKClient {
 
 		//updateProcessInstance host is null and insert into command
 		for(ProcessInstance processInstance : needFailoverProcessInstanceList){
+			if(Constants.NULL.equals(processInstance.getHost()) ){
+			    continue;
+			}
 			processService.processNeedFailoverProcessInstances(processInstance);
 		}
 
diff --git a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/process/ProcessService.java b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/process/ProcessService.java
index 36e2b60..0bab35a 100644
--- a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/process/ProcessService.java
+++ b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/process/ProcessService.java
@@ -121,10 +121,6 @@ public class ProcessService {
             logger.info("there is not enough thread for this command: {}", command);
             return setWaitingThreadProcess(command, processInstance);
         }
-        if (processInstance.getCommandType().equals(CommandType.RECOVER_TOLERANCE_FAULT_PROCESS)){
-            delCommandByid(command.getId());
-            return null;
-        }
         processInstance.setCommandType(command.getCommandType());
         processInstance.addHistoryCmd(command.getCommandType());
         saveProcessInstance(processInstance);
@@ -1484,7 +1480,7 @@ public class ProcessService {
     @Transactional(rollbackFor = Exception.class)
     public void processNeedFailoverProcessInstances(ProcessInstance processInstance){
         //1 update processInstance host is null
-        processInstance.setHost("null");
+        processInstance.setHost(Constants.NULL);
         processInstanceMapper.updateById(processInstance);
 
         //2 insert into recover command
diff --git a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/zk/AbstractZKClient.java b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/zk/AbstractZKClient.java
index acbbe76..2960969 100644
--- a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/zk/AbstractZKClient.java
+++ b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/zk/AbstractZKClient.java
@@ -125,12 +125,16 @@ public abstract class AbstractZKClient extends ZookeeperCachedOperator {
 		List<Server> masterServers = new ArrayList<>();
 		for (Map.Entry<String, String> entry : masterMap.entrySet()) {
 			Server masterServer = ResInfo.parseHeartbeatForZKInfo(entry.getValue());
+			if(masterServer == null){
+				continue;
+			}
 			String key = entry.getKey();
 			masterServer.setZkDirectory(parentPath + "/"+ key);
 			//set host and port
 			String[] hostAndPort=key.split(COLON);
 			String[] hosts=hostAndPort[0].split(DIVISION_STRING);
-			masterServer.setHost(hosts[hosts.length-1]);// fetch the last one
+			// fetch the last one
+			masterServer.setHost(hosts[hosts.length-1]);
 			masterServer.setPort(Integer.parseInt(hostAndPort[1]));
 			masterServers.add(masterServer);
 		}