You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by jh...@apache.org on 2015/09/11 07:37:45 UTC

tajo git commit: TAJO-1830: Fix race condition in HdfsServiceTracker.

Repository: tajo
Updated Branches:
  refs/heads/master cc6917804 -> cea832aca


TAJO-1830: Fix race condition in HdfsServiceTracker.

Closes #748


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/cea832ac
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/cea832ac
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/cea832ac

Branch: refs/heads/master
Commit: cea832acaf36398ddf32392d7b13493911ba6014
Parents: cc69178
Author: Jinho Kim <jh...@apache.org>
Authored: Fri Sep 11 14:36:31 2015 +0900
Committer: Jinho Kim <jh...@apache.org>
Committed: Fri Sep 11 14:36:31 2015 +0900

----------------------------------------------------------------------
 CHANGES                                         |  2 +
 .../apache/tajo/ha/TestHAServiceHDFSImpl.java   |  6 +--
 .../org/apache/tajo/ha/HdfsServiceTracker.java  | 51 +++++++++++---------
 3 files changed, 34 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/cea832ac/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 771c1bd..04adaa5 100644
--- a/CHANGES
+++ b/CHANGES
@@ -259,6 +259,8 @@ Release 0.11.0 - unreleased
 
   BUG FIXES
 
+    TAJO-1830: Fix race condition in HdfsServiceTracker. (jinho)
+
     TAJO-1727: Avoid to create external table using TableSpace. (jaehwa)
 
     TAJO-1600: Invalid query planning for distinct group-by. (hyunsik)

http://git-wip-us.apache.org/repos/asf/tajo/blob/cea832ac/tajo-core-tests/src/test/java/org/apache/tajo/ha/TestHAServiceHDFSImpl.java
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/ha/TestHAServiceHDFSImpl.java b/tajo-core-tests/src/test/java/org/apache/tajo/ha/TestHAServiceHDFSImpl.java
index 265f075..f0f01bf 100644
--- a/tajo-core-tests/src/test/java/org/apache/tajo/ha/TestHAServiceHDFSImpl.java
+++ b/tajo-core-tests/src/test/java/org/apache/tajo/ha/TestHAServiceHDFSImpl.java
@@ -81,11 +81,11 @@ public class TestHAServiceHDFSImpl  {
 
       verifyDataBaseAndTable(tracker);
 
-      assertEquals(2, fs.listStatus(activePath).length);
-      assertEquals(0, fs.listStatus(backupPath).length);
-
       assertTrue(fs.exists(new Path(activePath, HAConstants.ACTIVE_LOCK_FILE)));
       assertTrue(fs.exists(new Path(activePath, backupMaster.getMasterName().replaceAll(":", "_"))));
+
+      assertEquals(2, fs.listStatus(activePath).length);
+      assertEquals(0, fs.listStatus(backupPath).length);
     } finally {
       backupMaster.stop();
     }

http://git-wip-us.apache.org/repos/asf/tajo/blob/cea832ac/tajo-core/src/main/java/org/apache/tajo/ha/HdfsServiceTracker.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/ha/HdfsServiceTracker.java b/tajo-core/src/main/java/org/apache/tajo/ha/HdfsServiceTracker.java
index d0eb985..4b97fe6 100644
--- a/tajo-core/src/main/java/org/apache/tajo/ha/HdfsServiceTracker.java
+++ b/tajo-core/src/main/java/org/apache/tajo/ha/HdfsServiceTracker.java
@@ -32,13 +32,11 @@ import org.apache.tajo.master.TajoMaster;
 import org.apache.tajo.service.HAServiceTracker;
 import org.apache.tajo.service.ServiceTrackerException;
 import org.apache.tajo.service.TajoMasterInfo;
-import org.apache.tajo.util.*;
 import org.apache.tajo.util.FileUtil;
+import org.apache.tajo.util.TUtil;
 
-import javax.net.SocketFactory;
 import java.io.IOException;
 import java.net.InetSocketAddress;
-import java.net.Socket;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -191,7 +189,10 @@ public class HdfsServiceTracker extends HAServiceTracker {
         }
       }
     }
-    startPingChecker();
+
+    if(!isActiveMaster()) {
+      startPingChecker();
+    }
   }
 
   /**
@@ -219,7 +220,6 @@ public class HdfsServiceTracker extends HAServiceTracker {
     Path lockFile = new Path(activePath, HAConstants.ACTIVE_LOCK_FILE);
     try {
       lockOutput = fs.create(lockFile, false);
-      lockOutput.hsync();
       lockOutput.close();
       fs.deleteOnExit(lockFile);
       result = true;
@@ -241,7 +241,6 @@ public class HdfsServiceTracker extends HAServiceTracker {
       out = fs.create(path, false);
 
       out.writeUTF(sb.toString());
-      out.hsync();
       out.close();
 
       fs.deleteOnExit(path);
@@ -283,7 +282,9 @@ public class HdfsServiceTracker extends HAServiceTracker {
   }
 
   @Override
-  public void delete() throws IOException {
+  public synchronized void delete() throws IOException {
+    stopped = true;
+
     if (ShutdownHookManager.get().isShutdownInProgress()) return;
 
     String fileName = masterName.replaceAll(":", "_");
@@ -291,8 +292,6 @@ public class HdfsServiceTracker extends HAServiceTracker {
     fs.delete(new Path(activePath, fileName), false);
     fs.delete(new Path(activePath, HAConstants.ACTIVE_LOCK_FILE), false);
     fs.delete(new Path(backupPath, fileName), false);
-
-    stopped = true;
   }
 
   @Override
@@ -366,16 +365,17 @@ public class HdfsServiceTracker extends HAServiceTracker {
 
               // If active master is dead, this master should be active master instead of
               // previous active master.
-              if (!checkConnection(currentActiveMaster)) {
+              if (!stopped && !checkConnection(currentActiveMaster)) {
                 Path activeFile = new Path(activePath, currentActiveMaster.replaceAll(":", "_"));
                 fs.delete(activeFile, false);
+
                 Path lockFile = new Path(activePath, HAConstants.ACTIVE_LOCK_FILE);
                 fs.delete(lockFile, false);
                 register();
               }
             }
           } catch (Exception e) {
-            e.printStackTrace();
+            LOG.error(e.getMessage(), e);
           }
         }
       }
@@ -458,19 +458,34 @@ public class HdfsServiceTracker extends HAServiceTracker {
         throw new ServiceTrackerException("Active master base path must be a directory.");
       }
 
-      FileStatus[] files = fs.listStatus(activeMasterBaseDir);
       /* wait for active master from HDFS */
       int pause = conf.getIntVar(ConfVars.TAJO_MASTER_HA_CLIENT_RETRY_PAUSE_TIME);
       int maxRetry = conf.getIntVar(ConfVars.TAJO_MASTER_HA_CLIENT_RETRY_MAX_NUM);
       int retry = 0;
 
-      while (files.length < 2 && retry < maxRetry) {
+
+      Path activeMasterEntry = null;
+      FileStatus[] files = null;
+
+      loop:while (retry < maxRetry) {
+        files = fs.listStatus(activeMasterBaseDir);
+        for (FileStatus eachFile : files) {
+          //check if active file is written
+          if (!eachFile.getPath().getName().equals(HAConstants.ACTIVE_LOCK_FILE) && eachFile.getLen() > 0) {
+            activeMasterEntry = eachFile.getPath();
+            break loop;
+          }
+        }
+
         try {
           this.wait(pause);
         } catch (InterruptedException e) {
           throw new ServiceTrackerException(e);
         }
-        files = fs.listStatus(activeMasterBaseDir);
+      }
+
+      if (files == null || activeMasterEntry == null) {
+        throw new ServiceTrackerException("Active master entry cannot be found in: " + activeMasterBaseDir);
       }
 
       if (files.length < 1) {
@@ -480,14 +495,6 @@ public class HdfsServiceTracker extends HAServiceTracker {
         throw new ServiceTrackerException("Three or more than active master entries.");
       }
 
-      Path activeMasterEntry = null;
-
-      for (FileStatus eachFile : files) {
-        if (!eachFile.getPath().getName().equals(HAConstants.ACTIVE_LOCK_FILE)) {
-          activeMasterEntry = eachFile.getPath();
-        }
-      }
-
       if (!fs.isFile(activeMasterEntry)) {
         throw new ServiceTrackerException("Active master entry must be a file, but it is a directory.");
       }