You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by jh...@apache.org on 2015/09/11 07:37:45 UTC
tajo git commit: TAJO-1830: Fix race condition in HdfsServiceTracker.
Repository: tajo
Updated Branches:
refs/heads/master cc6917804 -> cea832aca
TAJO-1830: Fix race condition in HdfsServiceTracker.
Closes #748
Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/cea832ac
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/cea832ac
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/cea832ac
Branch: refs/heads/master
Commit: cea832acaf36398ddf32392d7b13493911ba6014
Parents: cc69178
Author: Jinho Kim <jh...@apache.org>
Authored: Fri Sep 11 14:36:31 2015 +0900
Committer: Jinho Kim <jh...@apache.org>
Committed: Fri Sep 11 14:36:31 2015 +0900
----------------------------------------------------------------------
CHANGES | 2 +
.../apache/tajo/ha/TestHAServiceHDFSImpl.java | 6 +--
.../org/apache/tajo/ha/HdfsServiceTracker.java | 51 +++++++++++---------
3 files changed, 34 insertions(+), 25 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tajo/blob/cea832ac/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 771c1bd..04adaa5 100644
--- a/CHANGES
+++ b/CHANGES
@@ -259,6 +259,8 @@ Release 0.11.0 - unreleased
BUG FIXES
+ TAJO-1830: Fix race condition in HdfsServiceTracker. (jinho)
+
TAJO-1727: Avoid to create external table using TableSpace. (jaehwa)
TAJO-1600: Invalid query planning for distinct group-by. (hyunsik)
http://git-wip-us.apache.org/repos/asf/tajo/blob/cea832ac/tajo-core-tests/src/test/java/org/apache/tajo/ha/TestHAServiceHDFSImpl.java
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/ha/TestHAServiceHDFSImpl.java b/tajo-core-tests/src/test/java/org/apache/tajo/ha/TestHAServiceHDFSImpl.java
index 265f075..f0f01bf 100644
--- a/tajo-core-tests/src/test/java/org/apache/tajo/ha/TestHAServiceHDFSImpl.java
+++ b/tajo-core-tests/src/test/java/org/apache/tajo/ha/TestHAServiceHDFSImpl.java
@@ -81,11 +81,11 @@ public class TestHAServiceHDFSImpl {
verifyDataBaseAndTable(tracker);
- assertEquals(2, fs.listStatus(activePath).length);
- assertEquals(0, fs.listStatus(backupPath).length);
-
assertTrue(fs.exists(new Path(activePath, HAConstants.ACTIVE_LOCK_FILE)));
assertTrue(fs.exists(new Path(activePath, backupMaster.getMasterName().replaceAll(":", "_"))));
+
+ assertEquals(2, fs.listStatus(activePath).length);
+ assertEquals(0, fs.listStatus(backupPath).length);
} finally {
backupMaster.stop();
}
http://git-wip-us.apache.org/repos/asf/tajo/blob/cea832ac/tajo-core/src/main/java/org/apache/tajo/ha/HdfsServiceTracker.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/ha/HdfsServiceTracker.java b/tajo-core/src/main/java/org/apache/tajo/ha/HdfsServiceTracker.java
index d0eb985..4b97fe6 100644
--- a/tajo-core/src/main/java/org/apache/tajo/ha/HdfsServiceTracker.java
+++ b/tajo-core/src/main/java/org/apache/tajo/ha/HdfsServiceTracker.java
@@ -32,13 +32,11 @@ import org.apache.tajo.master.TajoMaster;
import org.apache.tajo.service.HAServiceTracker;
import org.apache.tajo.service.ServiceTrackerException;
import org.apache.tajo.service.TajoMasterInfo;
-import org.apache.tajo.util.*;
import org.apache.tajo.util.FileUtil;
+import org.apache.tajo.util.TUtil;
-import javax.net.SocketFactory;
import java.io.IOException;
import java.net.InetSocketAddress;
-import java.net.Socket;
import java.util.ArrayList;
import java.util.List;
@@ -191,7 +189,10 @@ public class HdfsServiceTracker extends HAServiceTracker {
}
}
}
- startPingChecker();
+
+ if(!isActiveMaster()) {
+ startPingChecker();
+ }
}
/**
@@ -219,7 +220,6 @@ public class HdfsServiceTracker extends HAServiceTracker {
Path lockFile = new Path(activePath, HAConstants.ACTIVE_LOCK_FILE);
try {
lockOutput = fs.create(lockFile, false);
- lockOutput.hsync();
lockOutput.close();
fs.deleteOnExit(lockFile);
result = true;
@@ -241,7 +241,6 @@ public class HdfsServiceTracker extends HAServiceTracker {
out = fs.create(path, false);
out.writeUTF(sb.toString());
- out.hsync();
out.close();
fs.deleteOnExit(path);
@@ -283,7 +282,9 @@ public class HdfsServiceTracker extends HAServiceTracker {
}
@Override
- public void delete() throws IOException {
+ public synchronized void delete() throws IOException {
+ stopped = true;
+
if (ShutdownHookManager.get().isShutdownInProgress()) return;
String fileName = masterName.replaceAll(":", "_");
@@ -291,8 +292,6 @@ public class HdfsServiceTracker extends HAServiceTracker {
fs.delete(new Path(activePath, fileName), false);
fs.delete(new Path(activePath, HAConstants.ACTIVE_LOCK_FILE), false);
fs.delete(new Path(backupPath, fileName), false);
-
- stopped = true;
}
@Override
@@ -366,16 +365,17 @@ public class HdfsServiceTracker extends HAServiceTracker {
// If active master is dead, this master should be active master instead of
// previous active master.
- if (!checkConnection(currentActiveMaster)) {
+ if (!stopped && !checkConnection(currentActiveMaster)) {
Path activeFile = new Path(activePath, currentActiveMaster.replaceAll(":", "_"));
fs.delete(activeFile, false);
+
Path lockFile = new Path(activePath, HAConstants.ACTIVE_LOCK_FILE);
fs.delete(lockFile, false);
register();
}
}
} catch (Exception e) {
- e.printStackTrace();
+ LOG.error(e.getMessage(), e);
}
}
}
@@ -458,19 +458,34 @@ public class HdfsServiceTracker extends HAServiceTracker {
throw new ServiceTrackerException("Active master base path must be a directory.");
}
- FileStatus[] files = fs.listStatus(activeMasterBaseDir);
/* wait for active master from HDFS */
int pause = conf.getIntVar(ConfVars.TAJO_MASTER_HA_CLIENT_RETRY_PAUSE_TIME);
int maxRetry = conf.getIntVar(ConfVars.TAJO_MASTER_HA_CLIENT_RETRY_MAX_NUM);
int retry = 0;
- while (files.length < 2 && retry < maxRetry) {
+
+ Path activeMasterEntry = null;
+ FileStatus[] files = null;
+
+ loop:while (retry < maxRetry) {
+ files = fs.listStatus(activeMasterBaseDir);
+ for (FileStatus eachFile : files) {
+ //check if active file is written
+ if (!eachFile.getPath().getName().equals(HAConstants.ACTIVE_LOCK_FILE) && eachFile.getLen() > 0) {
+ activeMasterEntry = eachFile.getPath();
+ break loop;
+ }
+ }
+
try {
this.wait(pause);
} catch (InterruptedException e) {
throw new ServiceTrackerException(e);
}
- files = fs.listStatus(activeMasterBaseDir);
+ }
+
+ if (files == null || activeMasterEntry == null) {
+ throw new ServiceTrackerException("Active master entry cannot be found in: " + activeMasterBaseDir);
}
if (files.length < 1) {
@@ -480,14 +495,6 @@ public class HdfsServiceTracker extends HAServiceTracker {
throw new ServiceTrackerException("Three or more than active master entries.");
}
- Path activeMasterEntry = null;
-
- for (FileStatus eachFile : files) {
- if (!eachFile.getPath().getName().equals(HAConstants.ACTIVE_LOCK_FILE)) {
- activeMasterEntry = eachFile.getPath();
- }
- }
-
if (!fs.isFile(activeMasterEntry)) {
throw new ServiceTrackerException("Active master entry must be a file, but it is a directory.");
}