You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by mh...@apache.org on 2016/01/27 00:31:26 UTC
[3/4] incubator-asterixdb git commit: Asterix NCs Fault Tolerance
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-common/src/test/java/org/apache/asterix/test/aql/TestExecutor.java
----------------------------------------------------------------------
diff --git a/asterix-common/src/test/java/org/apache/asterix/test/aql/TestExecutor.java b/asterix-common/src/test/java/org/apache/asterix/test/aql/TestExecutor.java
index 447e96d..f54db63 100644
--- a/asterix-common/src/test/java/org/apache/asterix/test/aql/TestExecutor.java
+++ b/asterix-common/src/test/java/org/apache/asterix/test/aql/TestExecutor.java
@@ -24,12 +24,14 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
+import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
+import java.nio.charset.StandardCharsets;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
@@ -54,6 +56,7 @@ import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.httpclient.methods.StringRequestEntity;
import org.apache.commons.httpclient.params.HttpMethodParams;
import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
import org.json.JSONObject;
public class TestExecutor {
@@ -361,6 +364,14 @@ public class TestExecutor {
return getProcessOutput(p);
}
+ private static String executeVagrantScript(ProcessBuilder pb, String node, String scriptName) throws Exception {
+ pb.command("vagrant", "ssh", node, "--", pb.environment().get("SCRIPT_HOME") + scriptName);
+ Process p = pb.start();
+ p.waitFor();
+ InputStream input = p.getInputStream();
+ return IOUtils.toString(input, StandardCharsets.UTF_8.name());
+ }
+
private static String getScriptPath(String queryPath, String scriptBasePath, String scriptFileName) {
String targetWord = "queries" + File.separator;
int targetWordSize = targetWord.lastIndexOf(File.separator);
@@ -565,6 +576,22 @@ public class TestExecutor {
}
System.err.println("...but that was expected.");
break;
+ case "vagrant_script":
+ try {
+ String[] command = statement.trim().split(" ");
+ if (command.length != 2) {
+ throw new Exception("invalid vagrant script format");
+ }
+ String nodeId = command[0];
+ String scriptName = command[1];
+ String output = executeVagrantScript(pb, nodeId, scriptName);
+ if (output.contains("ERROR")) {
+ throw new Exception(output);
+ }
+ } catch (Exception e) {
+ throw new Exception("Test \"" + testFile + "\" FAILED!\n", e);
+ }
+ break;
default:
throw new IllegalArgumentException("No statements of type " + ctx.getType());
}
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-events/src/main/java/org/apache/asterix/event/util/PatternCreator.java
----------------------------------------------------------------------
diff --git a/asterix-events/src/main/java/org/apache/asterix/event/util/PatternCreator.java b/asterix-events/src/main/java/org/apache/asterix/event/util/PatternCreator.java
index 6085019..ce84cc8 100644
--- a/asterix-events/src/main/java/org/apache/asterix/event/util/PatternCreator.java
+++ b/asterix-events/src/main/java/org/apache/asterix/event/util/PatternCreator.java
@@ -315,10 +315,6 @@ public class PatternCreator {
patternList.addAll(createRemoveAsterixLogDirPattern(instance).getPattern());
patternList.addAll(createRemoveAsterixRootMetadata(instance).getPattern());
patternList.addAll(createRemoveAsterixTxnLogs(instance).getPattern());
- if (instance.getCluster().getDataReplication() != null
- && instance.getCluster().getDataReplication().isEnabled()) {
- patternList.addAll(createRemoveAsterixReplicationPattern(instance).getPattern());
- }
Patterns patterns = new Patterns(patternList);
return patterns;
}
@@ -647,29 +643,4 @@ public class PatternCreator {
Patterns patterns = new Patterns(patternList);
return patterns;
}
-
- private Patterns createRemoveAsterixReplicationPattern(AsterixInstance instance) throws Exception {
-
- List<Pattern> patternList = new ArrayList<Pattern>();
- Cluster cluster = instance.getCluster();
-
- Nodeid nodeid = null;
- String pargs = null;
- Event event = null;
- for (Node node : cluster.getNode()) {
- String[] nodeIODevices;
- String iodevices = node.getIodevices() == null ? cluster.getIodevices() : node.getIodevices();
- nodeIODevices = iodevices.trim().split(",");
- for (String nodeIODevice : nodeIODevices) {
- pargs = nodeIODevice + File.separator + cluster.getDataReplication().getReplicationStore();
- nodeid = new Nodeid(new Value(null, node.getId()));
- event = new Event("file_delete", nodeid, pargs);
- patternList.add(new Pattern(null, 1, null, event));
- }
- }
-
- Patterns patterns = new Patterns(patternList);
- return patterns;
- }
-
}
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-installer/pom.xml
----------------------------------------------------------------------
diff --git a/asterix-installer/pom.xml b/asterix-installer/pom.xml
index 34c3e0a..b257e71 100644
--- a/asterix-installer/pom.xml
+++ b/asterix-installer/pom.xml
@@ -29,6 +29,7 @@
<failsafe.test.excludes>**/DmlRecoveryIT.java</failsafe.test.excludes>
<cluster.test.excludes>**/AsterixClusterLifeCycleIT.java</cluster.test.excludes>
<cluster.extest.excludes>**/ClusterExecutionIT.java</cluster.extest.excludes>
+ <replication.test.excludes>**/ReplicationIT.java</replication.test.excludes>
</properties>
<licenses>
@@ -123,6 +124,7 @@
<exclude>${failsafe.test.excludes}</exclude>
<exclude>${cluster.test.excludes}</exclude>
<exclude>${cluster.extest.excludes}</exclude>
+ <exclude>${replication.test.excludes}</exclude>
</excludes>
</configuration>
<executions>
@@ -194,6 +196,8 @@
<forkMode>pertest</forkMode>
<excludes>
<exclude>${failsafe.test.excludes}</exclude>
+ <exclude>${cluster.test.excludes}</exclude>
+ <exclude>${cluster.extest.excludes}</exclude>
</excludes>
</configuration>
<executions>
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-installer/src/main/java/org/apache/asterix/installer/command/ValidateCommand.java
----------------------------------------------------------------------
diff --git a/asterix-installer/src/main/java/org/apache/asterix/installer/command/ValidateCommand.java b/asterix-installer/src/main/java/org/apache/asterix/installer/command/ValidateCommand.java
index 09c65c8..9559394 100644
--- a/asterix-installer/src/main/java/org/apache/asterix/installer/command/ValidateCommand.java
+++ b/asterix-installer/src/main/java/org/apache/asterix/installer/command/ValidateCommand.java
@@ -288,12 +288,6 @@ public class ValidateCommand extends AbstractCommand {
valid = false;
}
- if (cluster.getDataReplication().getReplicationStore() == null
- || cluster.getDataReplication().getReplicationStore().length() == 0) {
- valid = false;
- LOGGER.fatal("Replication store not defined. " + ERROR);
- }
-
if (cluster.getDataReplication().getReplicationPort() == null
|| cluster.getDataReplication().getReplicationPort().toString().length() == 0) {
valid = false;
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-installer/src/test/java/org/apache/asterix/installer/test/ReplicationIT.java
----------------------------------------------------------------------
diff --git a/asterix-installer/src/test/java/org/apache/asterix/installer/test/ReplicationIT.java b/asterix-installer/src/test/java/org/apache/asterix/installer/test/ReplicationIT.java
new file mode 100644
index 0000000..86c15ae
--- /dev/null
+++ b/asterix-installer/src/test/java/org/apache/asterix/installer/test/ReplicationIT.java
@@ -0,0 +1,257 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.installer.test;
+
+import java.io.File;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Map;
+import java.util.logging.Logger;
+
+import org.apache.asterix.test.aql.TestExecutor;
+import org.apache.asterix.testframework.context.TestCaseContext;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.codehaus.plexus.util.FileUtils;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+@RunWith(Parameterized.class)
+public class ReplicationIT {
+
+ private static final String PATH_BASE = StringUtils
+ .join(new String[] { "src", "test", "resources", "integrationts", "replication" }, File.separator);
+ private static final String CLUSTER_BASE = StringUtils
+ .join(new String[] { "src", "test", "resources", "clusterts" }, File.separator);
+ private static final String PATH_ACTUAL = "repliationtest" + File.separator;
+ private static String managixFolderName;
+ private static final Logger LOGGER = Logger.getLogger(ReplicationIT.class.getName());
+ private static File asterixProjectDir = new File(System.getProperty("user.dir"));
+ private static final String CLUSTER_CC_ADDRESS = "10.10.0.2";
+ private static final int CLUSTER_CC_API_PORT = 19002;
+ private static ProcessBuilder pb;
+ private static Map<String, String> env;
+ private final static TestExecutor testExecutor = new TestExecutor(CLUSTER_CC_ADDRESS, CLUSTER_CC_API_PORT);
+ private static String SCRIPT_HOME;
+ protected TestCaseContext tcCtx;
+
+ public ReplicationIT(TestCaseContext tcCtx) {
+ this.tcCtx = tcCtx;
+ }
+
+ @BeforeClass
+ public static void setUp() throws Exception {
+ File outdir = new File(PATH_ACTUAL);
+ outdir.mkdirs();
+
+ // vagrant setup
+ File installerTargetDir = new File(asterixProjectDir, "target");
+ String[] installerFiles = installerTargetDir.list(new FilenameFilter() {
+ @Override
+ public boolean accept(File dir, String name) {
+ return new File(dir, name).isDirectory() && name.startsWith("asterix-installer")
+ && name.endsWith("binary-assembly");
+ }
+ });
+
+ if (installerFiles == null || installerFiles.length == 0) {
+ throw new Exception("Couldn't find installer binaries");
+ }
+
+ managixFolderName = installerFiles[0];
+
+ //copy tests data
+ FileUtils.copyDirectoryStructure(
+ new File(StringUtils.join(
+ new String[] { "..", "asterix-replication", "src", "test", "resources", "data" },
+ File.separator)),
+ new File(StringUtils.join(new String[] { "src", "test", "resources", "clusterts", "data" },
+ File.separator)));
+
+ //copy tests scripts
+ FileUtils.copyDirectoryStructure(
+ new File(StringUtils.join(
+ new String[] { "..", "asterix-replication", "src", "test", "resources", "scripts" },
+ File.separator)),
+ new File(StringUtils.join(new String[] { "src", "test", "resources", "clusterts", "scripts" },
+ File.separator)));
+
+ invoke("cp", "-r", installerTargetDir.toString() + "/" + managixFolderName,
+ asterixProjectDir + "/" + CLUSTER_BASE);
+
+ remoteInvoke("cp -r /vagrant/" + managixFolderName + " /tmp/asterix");
+
+ SCRIPT_HOME = "/vagrant/scripts/";
+ pb = new ProcessBuilder();
+ env = pb.environment();
+ env.put("SCRIPT_HOME", SCRIPT_HOME);
+ File cwd = new File(asterixProjectDir.toString() + "/" + CLUSTER_BASE);
+ pb.directory(cwd);
+ pb.redirectErrorStream(true);
+
+ //make scripts executable
+ String chmodScriptsCmd = "chmod -R +x " + SCRIPT_HOME;
+ remoteInvoke(chmodScriptsCmd, "cc");
+ remoteInvoke(chmodScriptsCmd, "nc1");
+ remoteInvoke(chmodScriptsCmd, "nc2");
+
+ //managix configure
+ logOutput(managixInvoke("configure").getInputStream());
+
+ //managix validate
+ String validateOutput = IOUtils.toString(managixInvoke("validate").getInputStream(),
+ StandardCharsets.UTF_8.name());
+ if (validateOutput.contains("ERROR")) {
+ throw new Exception("Managix validate error: " + validateOutput);
+ }
+ }
+
+ @AfterClass
+ public static void tearDown() throws Exception {
+ //remove files
+ remoteInvoke("rm -rf /vagrant/asterix");
+ }
+
+ @Before
+ public void beforeTest() throws Exception {
+ //create instance
+ managixInvoke("create -n vagrant-ssh -c /vagrant/cluster_with_replication.xml").getInputStream();
+ }
+
+ @After
+ public void afterTest() throws Exception {
+ //stop instance
+ managixInvoke("stop -n vagrant-ssh");
+
+ //verify that all processes have been stopped
+ String killProcesses = "kill_cc_and_nc.sh";
+ executeVagrantScript("cc", killProcesses);
+ executeVagrantScript("nc1", killProcesses);
+ executeVagrantScript("nc2", killProcesses);
+
+ //delete storage
+ String deleteStorage = "delete_storage.sh";
+ executeVagrantScript("cc", deleteStorage);
+ executeVagrantScript("nc1", deleteStorage);
+ executeVagrantScript("nc2", deleteStorage);
+
+ //delete instance
+ managixInvoke("delete -n vagrant-ssh");
+ }
+
+ @Test
+ public void test() throws Exception {
+ testExecutor.executeTest(PATH_ACTUAL, tcCtx, pb, false);
+ }
+
+ @Parameters(name = "ReplicationIT {index}: {0}")
+ public static Collection<Object[]> tests() throws Exception {
+ Collection<Object[]> testArgs = buildTestsInXml(TestCaseContext.DEFAULT_TESTSUITE_XML_NAME);
+ if (testArgs.size() == 0) {
+ testArgs = buildTestsInXml(TestCaseContext.DEFAULT_TESTSUITE_XML_NAME);
+ }
+ return testArgs;
+ }
+
+ protected static Collection<Object[]> buildTestsInXml(String xmlfile) throws Exception {
+ Collection<Object[]> testArgs = new ArrayList<Object[]>();
+ TestCaseContext.Builder b = new TestCaseContext.Builder();
+ for (TestCaseContext ctx : b.build(new File(PATH_BASE), xmlfile)) {
+ testArgs.add(new Object[] { ctx });
+ }
+ return testArgs;
+ }
+
+ public static boolean checkOutput(InputStream input, String requiredSubString) {
+ String candidate;
+ try {
+ candidate = IOUtils.toString(input, StandardCharsets.UTF_8.name());
+ } catch (IOException e) {
+ LOGGER.warning("Could not check output of subprocess");
+ return false;
+ }
+ return candidate.contains(requiredSubString);
+ }
+
+ public static boolean checkOutput(String candidate, String requiredSubString) {
+ return candidate.contains(requiredSubString);
+ }
+
+ public static String processOut(Process p) throws IOException {
+ InputStream input = p.getInputStream();
+ return IOUtils.toString(input, StandardCharsets.UTF_8.name());
+ }
+
+ public static void logOutput(InputStream input) {
+ try {
+ LOGGER.info(IOUtils.toString(input, StandardCharsets.UTF_8.name()));
+ } catch (IOException e) {
+ LOGGER.warning("Could not print output of subprocess");
+ }
+ }
+
+ private static Process invoke(String... args) throws Exception {
+ ProcessBuilder pb = new ProcessBuilder(args);
+ pb.redirectErrorStream(true);
+ Process p = pb.start();
+ return p;
+ }
+
+ private static Process remoteInvoke(String cmd) throws Exception {
+ ProcessBuilder pb = new ProcessBuilder("vagrant", "ssh", "cc", "-c", "MANAGIX_HOME=/tmp/asterix/ " + cmd);
+ File cwd = new File(asterixProjectDir.toString() + "/" + CLUSTER_BASE);
+ pb.directory(cwd);
+ pb.redirectErrorStream(true);
+ Process p = pb.start();
+ p.waitFor();
+ return p;
+ }
+
+ private static Process remoteInvoke(String cmd, String node) throws Exception {
+ ProcessBuilder pb = new ProcessBuilder("vagrant", "ssh", node, "--", cmd);
+ File cwd = new File(asterixProjectDir.toString() + "/" + CLUSTER_BASE);
+ pb.directory(cwd);
+ pb.redirectErrorStream(true);
+ Process p = pb.start();
+ p.waitFor();
+ return p;
+ }
+
+ private static Process managixInvoke(String cmd) throws Exception {
+ return remoteInvoke("/tmp/asterix/bin/managix " + cmd);
+ }
+
+ private static String executeVagrantScript(String node, String scriptName) throws Exception {
+ pb.command("vagrant", "ssh", node, "--", SCRIPT_HOME + scriptName);
+ Process p = pb.start();
+ p.waitFor();
+ InputStream input = p.getInputStream();
+ return IOUtils.toString(input, StandardCharsets.UTF_8.name());
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-installer/src/test/resources/clusterts/cluster_with_replication.xml
----------------------------------------------------------------------
diff --git a/asterix-installer/src/test/resources/clusterts/cluster_with_replication.xml b/asterix-installer/src/test/resources/clusterts/cluster_with_replication.xml
new file mode 100644
index 0000000..b9f4658
--- /dev/null
+++ b/asterix-installer/src/test/resources/clusterts/cluster_with_replication.xml
@@ -0,0 +1,63 @@
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements. See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership. The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License. You may obtain a copy of the License at
+ !
+ ! http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied. See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+<cluster xmlns="cluster">
+
+ <name>vagrant</name>
+
+ <username>vagrant</username>
+
+ <working_dir>
+ <dir>/vagrant/asterix/managix-working</dir>
+ <NFS>true</NFS>
+ </working_dir>
+
+ <log_dir>/home/vagrant/asterix/logs/</log_dir>
+ <txn_log_dir>/home/vagrant/asterix/tx_logs</txn_log_dir>
+ <iodevices>/home/vagrant/asterix/p1,/home/vagrant/asterix/p2</iodevices>
+
+ <store>storage</store>
+
+ <java_home>/usr/java/latest</java_home>
+ <metadata_node>nc1</metadata_node>
+
+ <data_replication>
+ <enabled>true</enabled>
+ <replication_port>2000</replication_port>
+ <replication_factor>2</replication_factor>
+ <auto_failover>true</auto_failover>
+ <replication_time_out>10</replication_time_out>
+ </data_replication>
+
+ <master_node>
+ <id>cc</id>
+ <client_ip>10.10.0.2</client_ip>
+ <cluster_ip>10.10.0.2</cluster_ip>
+ <client_port>1098</client_port>
+ <cluster_port>1099</cluster_port>
+ <http_port>8888</http_port>
+ </master_node>
+ <node>
+ <id>nc1</id>
+ <cluster_ip>10.10.0.3</cluster_ip>
+ </node>
+ <node>
+ <id>nc2</id>
+ <cluster_ip>10.10.0.4</cluster_ip>
+ </node>
+</cluster>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-installer/src/test/resources/clusterts/known_hosts
----------------------------------------------------------------------
diff --git a/asterix-installer/src/test/resources/clusterts/known_hosts b/asterix-installer/src/test/resources/clusterts/known_hosts
index 5ab452a..273f9f3 100644
--- a/asterix-installer/src/test/resources/clusterts/known_hosts
+++ b/asterix-installer/src/test/resources/clusterts/known_hosts
@@ -1,6 +1,6 @@
-nc1,10.10.0.3 ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEA4DTPG2/P073Ak5htIlGYxFh9BYo3bwxBW/SZ1+MDtBUGSEFG0wP4dDRNxwzdmc2EB5JbiAb4t2wLtlFJUbhZUuhxxv2WP4Uastt+U2CWe8+/OsSCAieDv9+dvhT2YxkyAwSCFjyv9T+ftE7YyuIqgBoDTcsvCGzhcl80xd/mQuboneYdR8A0Q2cbd47BQ1D+76HW3l0t51N4fsvbds/LdkXtqVqCadiTKuZ/Ki9JkWlIwffF1IvtARdkW5hyA2fkdJcNBGcfIlxcEvXUvEcIi2FVdY+rJTR9014SrspPtfTyPe4mMgwNQGGT1dm9tnjhq/N6WrYOQ8J3HUn+r7tO5w==
-nc2,10.10.0.4 ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEA4DTPG2/P073Ak5htIlGYxFh9BYo3bwxBW/SZ1+MDtBUGSEFG0wP4dDRNxwzdmc2EB5JbiAb4t2wLtlFJUbhZUuhxxv2WP4Uastt+U2CWe8+/OsSCAieDv9+dvhT2YxkyAwSCFjyv9T+ftE7YyuIqgBoDTcsvCGzhcl80xd/mQuboneYdR8A0Q2cbd47BQ1D+76HW3l0t51N4fsvbds/LdkXtqVqCadiTKuZ/Ki9JkWlIwffF1IvtARdkW5hyA2fkdJcNBGcfIlxcEvXUvEcIi2FVdY+rJTR9014SrspPtfTyPe4mMgwNQGGT1dm9tnjhq/N6WrYOQ8J3HUn+r7tO5w==
-nc3,10.10.0.5 ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEA4DTPG2/P073Ak5htIlGYxFh9BYo3bwxBW/SZ1+MDtBUGSEFG0wP4dDRNxwzdmc2EB5JbiAb4t2wLtlFJUbhZUuhxxv2WP4Uastt+U2CWe8+/OsSCAieDv9+dvhT2YxkyAwSCFjyv9T+ftE7YyuIqgBoDTcsvCGzhcl80xd/mQuboneYdR8A0Q2cbd47BQ1D+76HW3l0t51N4fsvbds/LdkXtqVqCadiTKuZ/Ki9JkWlIwffF1IvtARdkW5hyA2fkdJcNBGcfIlxcEvXUvEcIi2FVdY+rJTR9014SrspPtfTyPe4mMgwNQGGT1dm9tnjhq/N6WrYOQ8J3HUn+r7tO5w==
-127.0.0.1 ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEA4DTPG2/P073Ak5htIlGYxFh9BYo3bwxBW/SZ1+MDtBUGSEFG0wP4dDRNxwzdmc2EB5JbiAb4t2wLtlFJUbhZUuhxxv2WP4Uastt+U2CWe8+/OsSCAieDv9+dvhT2YxkyAwSCFjyv9T+ftE7YyuIqgBoDTcsvCGzhcl80xd/mQuboneYdR8A0Q2cbd47BQ1D+76HW3l0t51N4fsvbds/LdkXtqVqCadiTKuZ/Ki9JkWlIwffF1IvtARdkW5hyA2fkdJcNBGcfIlxcEvXUvEcIi2FVdY+rJTR9014SrspPtfTyPe4mMgwNQGGT1dm9tnjhq/N6WrYOQ8J3HUn+r7tO5w==
::1 ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEA4DTPG2/P073Ak5htIlGYxFh9BYo3bwxBW/SZ1+MDtBUGSEFG0wP4dDRNxwzdmc2EB5JbiAb4t2wLtlFJUbhZUuhxxv2WP4Uastt+U2CWe8+/OsSCAieDv9+dvhT2YxkyAwSCFjyv9T+ftE7YyuIqgBoDTcsvCGzhcl80xd/mQuboneYdR8A0Q2cbd47BQ1D+76HW3l0t51N4fsvbds/LdkXtqVqCadiTKuZ/Ki9JkWlIwffF1IvtARdkW5hyA2fkdJcNBGcfIlxcEvXUvEcIi2FVdY+rJTR9014SrspPtfTyPe4mMgwNQGGT1dm9tnjhq/N6WrYOQ8J3HUn+r7tO5w==
-10.10.0.2 ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEA4DTPG2/P073Ak5htIlGYxFh9BYo3bwxBW/SZ1+MDtBUGSEFG0wP4dDRNxwzdmc2EB5JbiAb4t2wLtlFJUbhZUuhxxv2WP4Uastt+U2CWe8+/OsSCAieDv9+dvhT2YxkyAwSCFjyv9T+ftE7YyuIqgBoDTcsvCGzhcl80xd/mQuboneYdR8A0Q2cbd47BQ1D+76HW3l0t51N4fsvbds/LdkXtqVqCadiTKuZ/Ki9JkWlIwffF1IvtARdkW5hyA2fkdJcNBGcfIlxcEvXUvEcIi2FVdY+rJTR9014SrspPtfTyPe4mMgwNQGGT1dm9tnjhq/N6WrYOQ8J3HUn+r7tO5w==
+127.0.0.1 ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEA9gfVcSU968DdflnfJkup3em0MFX13uUTxsiaPisyahjcc++d+9yo+OL5Avffznal9Ev9hzptYclUw9Rx1fSs09g6w8qRiaRLHhYw3mgAer6vG9NZxaj2i2TAVUjdPKZDOSgtFzvPZiwWfhdqi6CmzMRi3MO5G3LzIRzB+qjMitgJ4p/R0mLTik40KTBQNxgN5EmegoANjyBDbdvBKf4vQvMwYGvByq4rMDyDCC8JgcR+B1kR8xNKwDYZFokuSrmAw6PXUmjyW6FaWaUFvrqaj64l1yHqLPcZDTbcSzum6+RaR5Gg2upfs3ahICOzzlnwnwtZxznWKU+rNd4T29cHOw==
+nc1,10.10.0.3 ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEA9gfVcSU968DdflnfJkup3em0MFX13uUTxsiaPisyahjcc++d+9yo+OL5Avffznal9Ev9hzptYclUw9Rx1fSs09g6w8qRiaRLHhYw3mgAer6vG9NZxaj2i2TAVUjdPKZDOSgtFzvPZiwWfhdqi6CmzMRi3MO5G3LzIRzB+qjMitgJ4p/R0mLTik40KTBQNxgN5EmegoANjyBDbdvBKf4vQvMwYGvByq4rMDyDCC8JgcR+B1kR8xNKwDYZFokuSrmAw6PXUmjyW6FaWaUFvrqaj64l1yHqLPcZDTbcSzum6+RaR5Gg2upfs3ahICOzzlnwnwtZxznWKU+rNd4T29cHOw==
+nc2,10.10.0.4 ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEA9gfVcSU968DdflnfJkup3em0MFX13uUTxsiaPisyahjcc++d+9yo+OL5Avffznal9Ev9hzptYclUw9Rx1fSs09g6w8qRiaRLHhYw3mgAer6vG9NZxaj2i2TAVUjdPKZDOSgtFzvPZiwWfhdqi6CmzMRi3MO5G3LzIRzB+qjMitgJ4p/R0mLTik40KTBQNxgN5EmegoANjyBDbdvBKf4vQvMwYGvByq4rMDyDCC8JgcR+B1kR8xNKwDYZFokuSrmAw6PXUmjyW6FaWaUFvrqaj64l1yHqLPcZDTbcSzum6+RaR5Gg2upfs3ahICOzzlnwnwtZxznWKU+rNd4T29cHOw==10.10.0.2 ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEA9gfVcSU968DdflnfJkup3em0MFX13uUTxsiaPisyahjcc++d+9yo+OL5Avffznal9Ev9hzptYclUw9Rx1fSs09g6w8qRiaRLHhYw3mgAer6vG9NZxaj2i2TAVUjdPKZDOSgtFzvPZiwWfhdqi6CmzMRi3MO5G3LzIRzB+qjMitgJ4p/R0mLTik40KTBQNxgN5EmegoANjyBDbdvBKf4vQvMwYGvByq4rMDyDCC8JgcR+B1kR8xNKwDYZFokuSrmAw6PXUmjyW6FaWaUFvrqaj64l1yHqLPcZDTbcSzum6+RaR5Gg2upfs3ahICOzzlnwnwtZxznWKU+rNd4T29cHOw==
+10.10.0.4 ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEA9gfVcSU968DdflnfJkup3em0MFX13uUTxsiaPisyahjcc++d+9yo+OL5Avffznal9Ev9hzptYclUw9Rx1fSs09g6w8qRiaRLHhYw3mgAer6vG9NZxaj2i2TAVUjdPKZDOSgtFzvPZiwWfhdqi6CmzMRi3MO5G3LzIRzB+qjMitgJ4p/R0mLTik40KTBQNxgN5EmegoANjyBDbdvBKf4vQvMwYGvByq4rMDyDCC8JgcR+B1kR8xNKwDYZFokuSrmAw6PXUmjyW6FaWaUFvrqaj64l1yHqLPcZDTbcSzum6+RaR5Gg2upfs3ahICOzzlnwnwtZxznWKU+rNd4T29cHOw==
+10.10.0.2 ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEA9gfVcSU968DdflnfJkup3em0MFX13uUTxsiaPisyahjcc++d+9yo+OL5Avffznal9Ev9hzptYclUw9Rx1fSs09g6w8qRiaRLHhYw3mgAer6vG9NZxaj2i2TAVUjdPKZDOSgtFzvPZiwWfhdqi6CmzMRi3MO5G3LzIRzB+qjMitgJ4p/R0mLTik40KTBQNxgN5EmegoANjyBDbdvBKf4vQvMwYGvByq4rMDyDCC8JgcR+B1kR8xNKwDYZFokuSrmAw6PXUmjyW6FaWaUFvrqaj64l1yHqLPcZDTbcSzum6+RaR5Gg2upfs3ahICOzzlnwnwtZxznWKU+rNd4T29cHOw==
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/bulkload/bulkload.1.ddl.aql
----------------------------------------------------------------------
diff --git a/asterix-installer/src/test/resources/integrationts/replication/queries/failover/bulkload/bulkload.1.ddl.aql b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/bulkload/bulkload.1.ddl.aql
new file mode 100644
index 0000000..6a2441e
--- /dev/null
+++ b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/bulkload/bulkload.1.ddl.aql
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : bulkload.aql
+ * Description : Check that Bulkload LSM component are replicated correclty.
+ The test goes as follows:
+ start 2 nodes, bulkload a dataset, query data, kill one node
+ and wait until the failover complete, query the data again.
+ * Expected Result : Success
+ * Date : January 6 2016
+ */
+
+drop dataverse TinySocial if exists;
+create dataverse TinySocial;
+use dataverse TinySocial;
+
+create type EmploymentType as open {
+ organization-name: string,
+ start-date: date,
+ end-date: date?
+}
+
+create type FacebookUserType as closed {
+ id: int,
+ alias: string,
+ name: string,
+ user-since: datetime,
+ friend-ids: {{ int32 }},
+ employment: [EmploymentType]
+}
+
+/********* 2. Create Datasets ***********/
+use dataverse TinySocial;
+
+drop dataset FacebookUsers if exists;
+
+create dataset FacebookUsers(FacebookUserType)
+primary key id;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/bulkload/bulkload.2.update.aql
----------------------------------------------------------------------
diff --git a/asterix-installer/src/test/resources/integrationts/replication/queries/failover/bulkload/bulkload.2.update.aql b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/bulkload/bulkload.2.update.aql
new file mode 100644
index 0000000..ae14ad0
--- /dev/null
+++ b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/bulkload/bulkload.2.update.aql
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : bulkload.aql
+ * Description : Check that Bulkload LSM component are replicated correclty.
+ The test goes as follows:
+ start 2 nodes, bulkload a dataset, query data, kill one node
+ and wait until the failover complete, query the data again.
+ * Expected Result : Success
+ * Date : January 6 2016
+ */
+
+use dataverse TinySocial;
+
+load dataset FacebookUsers using localfs
+(("path"="vagrant-ssh_nc1:///vagrant/data/fbu.adm"),("format"="adm"));
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/bulkload/bulkload.3.txnqbc.aql
----------------------------------------------------------------------
diff --git a/asterix-installer/src/test/resources/integrationts/replication/queries/failover/bulkload/bulkload.3.txnqbc.aql b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/bulkload/bulkload.3.txnqbc.aql
new file mode 100644
index 0000000..9c8cb96
--- /dev/null
+++ b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/bulkload/bulkload.3.txnqbc.aql
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : bulkload.aql
+ * Description : Check that Bulkload LSM component are replicated correclty.
+ The test goes as follows:
+ start 2 nodes, bulkload a dataset, query data, kill one node
+ and wait until the failover complete, query the data again.
+ * Expected Result : Success
+ * Date : January 6 2016
+ */
+
+use dataverse TinySocial;
+
+count (for $x in dataset FacebookUsers return $x);
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/bulkload/bulkload.4.vagrant_script.aql
----------------------------------------------------------------------
diff --git a/asterix-installer/src/test/resources/integrationts/replication/queries/failover/bulkload/bulkload.4.vagrant_script.aql b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/bulkload/bulkload.4.vagrant_script.aql
new file mode 100644
index 0000000..5695ed7
--- /dev/null
+++ b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/bulkload/bulkload.4.vagrant_script.aql
@@ -0,0 +1 @@
+nc2 kill_cc_and_nc.sh
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/bulkload/bulkload.5.sleep.aql
----------------------------------------------------------------------
diff --git a/asterix-installer/src/test/resources/integrationts/replication/queries/failover/bulkload/bulkload.5.sleep.aql b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/bulkload/bulkload.5.sleep.aql
new file mode 100644
index 0000000..51b0e76
--- /dev/null
+++ b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/bulkload/bulkload.5.sleep.aql
@@ -0,0 +1 @@
+60000
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/bulkload/bulkload.6.txnqar.aql
----------------------------------------------------------------------
diff --git a/asterix-installer/src/test/resources/integrationts/replication/queries/failover/bulkload/bulkload.6.txnqar.aql b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/bulkload/bulkload.6.txnqar.aql
new file mode 100644
index 0000000..9c8cb96
--- /dev/null
+++ b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/bulkload/bulkload.6.txnqar.aql
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : bulkload.aql
+ * Description : Check that Bulkload LSM component are replicated correclty.
+ The test goes as follows:
+ start 2 nodes, bulkload a dataset, query data, kill one node
+ and wait until the failover complete, query the data again.
+ * Expected Result : Success
+ * Date : January 6 2016
+ */
+
+use dataverse TinySocial;
+
+count (for $x in dataset FacebookUsers return $x);
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/mem_component_recovery/mem_component_recovery.1.ddl.aql
----------------------------------------------------------------------
diff --git a/asterix-installer/src/test/resources/integrationts/replication/queries/failover/mem_component_recovery/mem_component_recovery.1.ddl.aql b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/mem_component_recovery/mem_component_recovery.1.ddl.aql
new file mode 100644
index 0000000..8de2067
--- /dev/null
+++ b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/mem_component_recovery/mem_component_recovery.1.ddl.aql
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : mem_component_recovery.aql
+ * Description : Check that Memory LSM component are replicated and recovered correclty.
+ The test goes as follows:
+ start 2 nodes, bulkload a dataset, copy it to in-memory dataset, query
+ data from memory, kill one node and wait until the failover complete,
+ query the data again.
+ * Expected Result : Success
+ * Date : January 6 2016
+ */
+
+drop dataverse TinySocial if exists;
+create dataverse TinySocial;
+use dataverse TinySocial;
+
+create type EmploymentType as open {
+ organization-name: string,
+ start-date: date,
+ end-date: date?
+}
+
+create type FacebookUserType as closed {
+ id: int,
+ alias: string,
+ name: string,
+ user-since: datetime,
+ friend-ids: {{ int32 }},
+ employment: [EmploymentType]
+}
+
+/********* 2. Create Datasets ***********/
+use dataverse TinySocial;
+
+drop dataset FacebookUsers if exists;
+
+create dataset FacebookUsers(FacebookUserType)
+primary key id;
+
+create dataset FacebookUsersInMemory(FacebookUserType)
+primary key id;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/mem_component_recovery/mem_component_recovery.2.update.aql
----------------------------------------------------------------------
diff --git a/asterix-installer/src/test/resources/integrationts/replication/queries/failover/mem_component_recovery/mem_component_recovery.2.update.aql b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/mem_component_recovery/mem_component_recovery.2.update.aql
new file mode 100644
index 0000000..8087689
--- /dev/null
+++ b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/mem_component_recovery/mem_component_recovery.2.update.aql
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : mem_component_recovery.aql
+ * Description : Check that Memory LSM component are replicated and recovered correclty.
+ The test goes as follows:
+ start 2 nodes, bulkload a dataset, copy it to in-memory dataset, query
+ data from memory, kill one node and wait until the failover complete,
+ query the data again.
+ * Expected Result : Success
+ * Date : January 6 2016
+ */
+use dataverse TinySocial;
+
+load dataset FacebookUsers using localfs
+(("path"="vagrant-ssh_nc1:///vagrant/data/fbu.adm"),("format"="adm"));
+
+insert into dataset TinySocial.FacebookUsersInMemory(for $x in dataset TinySocial.FacebookUsers return $x);
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/mem_component_recovery/mem_component_recovery.3.txnqbc.aql
----------------------------------------------------------------------
diff --git a/asterix-installer/src/test/resources/integrationts/replication/queries/failover/mem_component_recovery/mem_component_recovery.3.txnqbc.aql b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/mem_component_recovery/mem_component_recovery.3.txnqbc.aql
new file mode 100644
index 0000000..e25e409
--- /dev/null
+++ b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/mem_component_recovery/mem_component_recovery.3.txnqbc.aql
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : mem_component_recovery.aql
+ * Description : Check that Memory LSM component are replicated and recovered correclty.
+ The test goes as follows:
+ start 2 nodes, bulkload a dataset, copy it to in-memory dataset, query
+ data from memory, kill one node and wait until the failover complete,
+ query the data again.
+ * Expected Result : Success
+ * Date : January 6 2016
+ */
+
+use dataverse TinySocial;
+
+count (for $x in dataset FacebookUsersInMemory return $x);
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/mem_component_recovery/mem_component_recovery.4.vagrant_script.aql
----------------------------------------------------------------------
diff --git a/asterix-installer/src/test/resources/integrationts/replication/queries/failover/mem_component_recovery/mem_component_recovery.4.vagrant_script.aql b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/mem_component_recovery/mem_component_recovery.4.vagrant_script.aql
new file mode 100644
index 0000000..5695ed7
--- /dev/null
+++ b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/mem_component_recovery/mem_component_recovery.4.vagrant_script.aql
@@ -0,0 +1 @@
+nc2 kill_cc_and_nc.sh
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/mem_component_recovery/mem_component_recovery.5.sleep.aql
----------------------------------------------------------------------
diff --git a/asterix-installer/src/test/resources/integrationts/replication/queries/failover/mem_component_recovery/mem_component_recovery.5.sleep.aql b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/mem_component_recovery/mem_component_recovery.5.sleep.aql
new file mode 100644
index 0000000..51b0e76
--- /dev/null
+++ b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/mem_component_recovery/mem_component_recovery.5.sleep.aql
@@ -0,0 +1 @@
+60000
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/mem_component_recovery/mem_component_recovery.6.txnqar.aql
----------------------------------------------------------------------
diff --git a/asterix-installer/src/test/resources/integrationts/replication/queries/failover/mem_component_recovery/mem_component_recovery.6.txnqar.aql b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/mem_component_recovery/mem_component_recovery.6.txnqar.aql
new file mode 100644
index 0000000..e25e409
--- /dev/null
+++ b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/mem_component_recovery/mem_component_recovery.6.txnqar.aql
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : mem_component_recovery.aql
+ * Description : Check that Memory LSM component are replicated and recovered correclty.
+ The test goes as follows:
+ start 2 nodes, bulkload a dataset, copy it to in-memory dataset, query
+ data from memory, kill one node and wait until the failover complete,
+ query the data again.
+ * Expected Result : Success
+ * Date : January 6 2016
+ */
+
+use dataverse TinySocial;
+
+count (for $x in dataset FacebookUsersInMemory return $x);
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/metadata_node/metadata_node.1.ddl.aql
----------------------------------------------------------------------
diff --git a/asterix-installer/src/test/resources/integrationts/replication/queries/failover/metadata_node/metadata_node.1.ddl.aql b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/metadata_node/metadata_node.1.ddl.aql
new file mode 100644
index 0000000..113d144
--- /dev/null
+++ b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/metadata_node/metadata_node.1.ddl.aql
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : metadata_node_recovery.aql
+ * Description : Check that metadata node failover is done correctly.
+ The test goes as follows:
+ start 2 nodes, create a dataset, kill metadata node
+ and wait until the failover complete, verify the
+ dataset still exists.
+ * Expected Result : Success
+ * Date : January 6 2016
+ */
+
+drop dataverse TinySocial if exists;
+create dataverse TinySocial;
+use dataverse TinySocial;
+
+create type EmploymentType as open {
+ organization-name: string,
+ start-date: date,
+ end-date: date?
+}
+
+create type FacebookUserType as closed {
+ id: int,
+ alias: string,
+ name: string,
+ user-since: datetime,
+ friend-ids: {{ int32 }},
+ employment: [EmploymentType]
+}
+
+/********* 2. Create Datasets ***********/
+use dataverse TinySocial;
+
+drop dataset FacebookUsers if exists;
+
+create dataset FacebookUsers(FacebookUserType)
+primary key id;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/metadata_node/metadata_node.2.txnqbc.aql
----------------------------------------------------------------------
diff --git a/asterix-installer/src/test/resources/integrationts/replication/queries/failover/metadata_node/metadata_node.2.txnqbc.aql b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/metadata_node/metadata_node.2.txnqbc.aql
new file mode 100644
index 0000000..76bdcfe
--- /dev/null
+++ b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/metadata_node/metadata_node.2.txnqbc.aql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : metadata_node_recovery.aql
+ * Description : Check that metadata node failover is done correctly.
+ The test goes as follows:
+ start 2 nodes, create a dataset, kill metadata node
+ and wait until the failover complete, verify the
+ dataset still exists.
+ * Expected Result : Success
+ * Date : January 6 2016
+ */
+
+for $x in dataset Metadata.Dataset where $x.DatasetName ='FacebookUsers' return $x.DatasetName;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/metadata_node/metadata_node.3.vagrant_script.aql
----------------------------------------------------------------------
diff --git a/asterix-installer/src/test/resources/integrationts/replication/queries/failover/metadata_node/metadata_node.3.vagrant_script.aql b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/metadata_node/metadata_node.3.vagrant_script.aql
new file mode 100644
index 0000000..5eec164
--- /dev/null
+++ b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/metadata_node/metadata_node.3.vagrant_script.aql
@@ -0,0 +1 @@
+nc1 kill_cc_and_nc.sh
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/metadata_node/metadata_node.4.sleep.aql
----------------------------------------------------------------------
diff --git a/asterix-installer/src/test/resources/integrationts/replication/queries/failover/metadata_node/metadata_node.4.sleep.aql b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/metadata_node/metadata_node.4.sleep.aql
new file mode 100644
index 0000000..51b0e76
--- /dev/null
+++ b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/metadata_node/metadata_node.4.sleep.aql
@@ -0,0 +1 @@
+60000
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/metadata_node/metadata_node.5.txnqar.aql
----------------------------------------------------------------------
diff --git a/asterix-installer/src/test/resources/integrationts/replication/queries/failover/metadata_node/metadata_node.5.txnqar.aql b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/metadata_node/metadata_node.5.txnqar.aql
new file mode 100644
index 0000000..ac1c593
--- /dev/null
+++ b/asterix-installer/src/test/resources/integrationts/replication/queries/failover/metadata_node/metadata_node.5.txnqar.aql
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Test case Name : metadata_node_recovery.aql
+ * Description : Check that metadata node failover is done correctly.
+ The test goes as follows:
+ start 2 nodes, create a dataset, kill metadata node
+ and wait until the failover complete, verify the
+ dataset still exists.
+ * Expected Result : Success
+ * Date : January 6 2016
+ */
+
+use dataverse TinySocial;
+
+for $x in dataset Metadata.Dataset where $x.DatasetName ='FacebookUsers' return $x.DatasetName;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-installer/src/test/resources/integrationts/replication/testsuite.xml
----------------------------------------------------------------------
diff --git a/asterix-installer/src/test/resources/integrationts/replication/testsuite.xml b/asterix-installer/src/test/resources/integrationts/replication/testsuite.xml
new file mode 100644
index 0000000..f033086
--- /dev/null
+++ b/asterix-installer/src/test/resources/integrationts/replication/testsuite.xml
@@ -0,0 +1,37 @@
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements. See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership. The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License. You may obtain a copy of the License at
+ !
+ ! http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied. See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+<test-suite xmlns="urn:xml.testframework.asterix.apache.org" ResultOffsetPath="results" QueryOffsetPath="queries" QueryFileExtension=".aql">
+ <test-group name="failover">
+ <test-case FilePath="failover">
+ <compilation-unit name="bulkload">
+ <output-dir compare="Text">bulkload</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="failover">
+ <compilation-unit name="mem_component_recovery">
+ <output-dir compare="Text">mem_component_recovery</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="failover">
+ <compilation-unit name="metadata_node">
+ <output-dir compare="Text">metadata_node</output-dir>
+ </compilation-unit>
+ </test-case>
+ </test-group>
+</test-suite>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-metadata/src/main/java/org/apache/asterix/metadata/bootstrap/MetadataBootstrap.java
----------------------------------------------------------------------
diff --git a/asterix-metadata/src/main/java/org/apache/asterix/metadata/bootstrap/MetadataBootstrap.java b/asterix-metadata/src/main/java/org/apache/asterix/metadata/bootstrap/MetadataBootstrap.java
index aa7f7d5..823e861 100644
--- a/asterix-metadata/src/main/java/org/apache/asterix/metadata/bootstrap/MetadataBootstrap.java
+++ b/asterix-metadata/src/main/java/org/apache/asterix/metadata/bootstrap/MetadataBootstrap.java
@@ -383,6 +383,12 @@ public class MetadataBootstrap {
dataLifecycleManager.register(absolutePath, lsmBtree);
} else {
final LocalResource resource = localResourceRepository.getResourceByPath(absolutePath);
+ if (resource == null) {
+ throw new Exception("Could not find required metadata indexes. Please delete "
+ + propertiesProvider.getMetadataProperties().getTransactionLogDirs()
+ .get(runtimeContext.getTransactionSubsystem().getId())
+ + " to intialize as a new instance. (WARNING: all data will be lost.)");
+ }
resourceID = resource.getResourceId();
lsmBtree = (LSMBTree) dataLifecycleManager.getIndex(absolutePath);
if (lsmBtree == null) {
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SplitsAndConstraintsUtil.java
----------------------------------------------------------------------
diff --git a/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SplitsAndConstraintsUtil.java b/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SplitsAndConstraintsUtil.java
index b6f3c9e..601ce15 100644
--- a/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SplitsAndConstraintsUtil.java
+++ b/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SplitsAndConstraintsUtil.java
@@ -142,4 +142,10 @@ public class SplitsAndConstraintsUtil {
FileSplit[] splits = splitsForFilesIndex(mdTxnCtx, dataverseName, datasetName, targetIdxName, create);
return StoragePathUtil.splitProviderAndPartitionConstraints(splits);
}
+
+ public static String getIndexPath(String partitionPath, int partition, String dataverse, String fullIndexName) {
+ String storageDirName = AsterixClusterProperties.INSTANCE.getStorageDirectoryName();
+ return partitionPath + StoragePathUtil.prepareStoragePartitionPath(storageDirName, partition) + File.separator
+ + StoragePathUtil.prepareDataverseIndexName(dataverse, fullIndexName);
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-om/src/main/java/org/apache/asterix/om/util/AsterixAppContextInfo.java
----------------------------------------------------------------------
diff --git a/asterix-om/src/main/java/org/apache/asterix/om/util/AsterixAppContextInfo.java b/asterix-om/src/main/java/org/apache/asterix/om/util/AsterixAppContextInfo.java
index 866162b..082618b 100644
--- a/asterix-om/src/main/java/org/apache/asterix/om/util/AsterixAppContextInfo.java
+++ b/asterix-om/src/main/java/org/apache/asterix/om/util/AsterixAppContextInfo.java
@@ -20,10 +20,11 @@ package org.apache.asterix.om.util;
import java.util.logging.Logger;
+import org.apache.asterix.common.cluster.IGlobalRecoveryMaanger;
+import org.apache.asterix.common.config.AsterixBuildProperties;
import org.apache.asterix.common.config.AsterixCompilerProperties;
import org.apache.asterix.common.config.AsterixExternalProperties;
import org.apache.asterix.common.config.AsterixFeedProperties;
-import org.apache.asterix.common.config.AsterixBuildProperties;
import org.apache.asterix.common.config.AsterixMetadataProperties;
import org.apache.asterix.common.config.AsterixPropertiesAccessor;
import org.apache.asterix.common.config.AsterixReplicationProperties;
@@ -56,12 +57,13 @@ public class AsterixAppContextInfo implements IAsterixApplicationContextInfo, IA
private AsterixFeedProperties feedProperties;
private AsterixBuildProperties buildProperties;
private AsterixReplicationProperties replicationProperties;
-
+ private final IGlobalRecoveryMaanger globalRecoveryMaanger;
private IHyracksClientConnection hcc;
- public static void initialize(ICCApplicationContext ccAppCtx, IHyracksClientConnection hcc) throws AsterixException {
+ public static void initialize(ICCApplicationContext ccAppCtx, IHyracksClientConnection hcc,
+ IGlobalRecoveryMaanger globalRecoveryMaanger) throws AsterixException {
if (INSTANCE == null) {
- INSTANCE = new AsterixAppContextInfo(ccAppCtx, hcc);
+ INSTANCE = new AsterixAppContextInfo(ccAppCtx, hcc, globalRecoveryMaanger);
}
AsterixPropertiesAccessor propertiesAccessor = new AsterixPropertiesAccessor();
INSTANCE.compilerProperties = new AsterixCompilerProperties(propertiesAccessor);
@@ -77,9 +79,11 @@ public class AsterixAppContextInfo implements IAsterixApplicationContextInfo, IA
Logger.getLogger("org.apache").setLevel(INSTANCE.externalProperties.getLogLevel());
}
- private AsterixAppContextInfo(ICCApplicationContext ccAppCtx, IHyracksClientConnection hcc) {
+ private AsterixAppContextInfo(ICCApplicationContext ccAppCtx, IHyracksClientConnection hcc,
+ IGlobalRecoveryMaanger globalRecoveryMaanger) {
this.appCtx = ccAppCtx;
this.hcc = hcc;
+ this.globalRecoveryMaanger = globalRecoveryMaanger;
}
public static AsterixAppContextInfo getInstance() {
@@ -144,4 +148,9 @@ public class AsterixAppContextInfo implements IAsterixApplicationContextInfo, IA
public AsterixReplicationProperties getReplicationProperties() {
return replicationProperties;
}
+
+ @Override
+ public IGlobalRecoveryMaanger getGlobalRecoveryManager() {
+ return globalRecoveryMaanger;
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/8fc8bf8b/asterix-om/src/main/java/org/apache/asterix/om/util/AsterixClusterProperties.java
----------------------------------------------------------------------
diff --git a/asterix-om/src/main/java/org/apache/asterix/om/util/AsterixClusterProperties.java b/asterix-om/src/main/java/org/apache/asterix/om/util/AsterixClusterProperties.java
index 80008c5..c2c28df 100644
--- a/asterix-om/src/main/java/org/apache/asterix/om/util/AsterixClusterProperties.java
+++ b/asterix-om/src/main/java/org/apache/asterix/om/util/AsterixClusterProperties.java
@@ -35,6 +35,12 @@ import javax.xml.bind.Unmarshaller;
import org.apache.asterix.common.api.IClusterManagementWork.ClusterState;
import org.apache.asterix.common.cluster.ClusterPartition;
+import org.apache.asterix.common.config.AsterixReplicationProperties;
+import org.apache.asterix.common.messaging.TakeoverMetadataNodeRequestMessage;
+import org.apache.asterix.common.messaging.TakeoverMetadataNodeResponseMessage;
+import org.apache.asterix.common.messaging.TakeoverPartitionsRequestMessage;
+import org.apache.asterix.common.messaging.TakeoverPartitionsResponseMessage;
+import org.apache.asterix.common.messaging.api.ICCMessageBroker;
import org.apache.asterix.event.schema.cluster.Cluster;
import org.apache.asterix.event.schema.cluster.Node;
import org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint;
@@ -45,6 +51,11 @@ import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConst
*/
public class AsterixClusterProperties {
+ /**
+ * TODO: currently after instance restarts we require all nodes to join again, otherwise the cluster wont be ACTIVE.
+ * we may overcome this by storing the cluster state before the instance shutdown and using it on startup to identify
+ * the nodes that are expected the join.
+ */
private static final Logger LOGGER = Logger.getLogger(AsterixClusterProperties.class.getName());
@@ -63,6 +74,12 @@ public class AsterixClusterProperties {
private Map<String, ClusterPartition[]> node2PartitionsMap = null;
private SortedMap<Integer, ClusterPartition> clusterPartitions = null;
+ private Map<Long, TakeoverPartitionsRequestMessage> pendingTakeoverRequests = null;
+
+ private long takeoverRequestId = 0;
+ private String currentMetadataNode = null;
+ private boolean isMetadataNodeActive = false;
+ private boolean autoFailover = false;
private AsterixClusterProperties() {
InputStream is = this.getClass().getClassLoader().getResourceAsStream(CLUSTER_CONFIGURATION_FILE);
@@ -82,6 +99,13 @@ public class AsterixClusterProperties {
if (AsterixAppContextInfo.getInstance().getCCApplicationContext() != null) {
node2PartitionsMap = AsterixAppContextInfo.getInstance().getMetadataProperties().getNodePartitions();
clusterPartitions = AsterixAppContextInfo.getInstance().getMetadataProperties().getClusterPartitions();
+ currentMetadataNode = AsterixAppContextInfo.getInstance().getMetadataProperties().getMetadataNodeName();
+ if (isAutoFailoverEnabled()) {
+ autoFailover = cluster.getDataReplication().isAutoFailover();
+ }
+ if (autoFailover) {
+ pendingTakeoverRequests = new HashMap<>();
+ }
}
}
}
@@ -91,18 +115,24 @@ public class AsterixClusterProperties {
public synchronized void removeNCConfiguration(String nodeId) {
updateNodePartitions(nodeId, false);
ncConfiguration.remove(nodeId);
+ if (nodeId.equals(currentMetadataNode)) {
+ isMetadataNodeActive = false;
+ LOGGER.info("Metadata node is now inactive");
+ }
if (LOGGER.isLoggable(Level.INFO)) {
- LOGGER.info(" Removing configuration parameters for node id " + nodeId);
+ LOGGER.info("Removing configuration parameters for node id " + nodeId);
+ }
+ if (autoFailover) {
+ requestPartitionsTakeover(nodeId);
}
- // TODO implement fault tolerance as follows:
- // 1. collect the partitions of the failed NC
- // 2. For each partition, request a remote replica to take over.
- // 3. wait until each remote replica completes the recovery for the lost partitions
- // 4. update the cluster state
}
public synchronized void addNCConfiguration(String nodeId, Map<String, String> configuration) {
ncConfiguration.put(nodeId, configuration);
+ if (nodeId.equals(currentMetadataNode)) {
+ isMetadataNodeActive = true;
+ LOGGER.info("Metadata node is now active");
+ }
updateNodePartitions(nodeId, true);
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info(" Registering configuration parameters for node id " + nodeId);
@@ -118,8 +148,6 @@ public class AsterixClusterProperties {
p.setActive(added);
if (added) {
p.setActiveNodeId(nodeId);
- } else {
- p.setActiveNodeId(null);
}
}
resetClusterPartitionConstraint();
@@ -135,13 +163,20 @@ public class AsterixClusterProperties {
return;
}
}
- // if all storage partitions are active, then the cluster is active
- state = ClusterState.ACTIVE;
- LOGGER.info("Cluster is now ACTIVE");
+ //if all storage partitions are active as well as the metadata node, then the cluster is active
+ if (isMetadataNodeActive) {
+ state = ClusterState.ACTIVE;
+ LOGGER.info("Cluster is now ACTIVE");
+ //start global recovery
+ AsterixAppContextInfo.getInstance().getGlobalRecoveryManager().startGlobalRecovery();
+ } else {
+ requestMetadataNodeTakeover();
+ }
}
/**
* Returns the number of IO devices configured for a Node Controller
+ *
* @param nodeId
* unique identifier of the Node Controller
* @return number of IO devices. -1 if the node id is not valid. A node id
@@ -155,6 +190,7 @@ public class AsterixClusterProperties {
/**
* Returns the IO devices configured for a Node Controller
+ *
* @param nodeId
* unique identifier of the Node Controller
* @return a list of IO devices. null if node id is not valid. A node id is not valid
@@ -257,4 +293,133 @@ public class AsterixClusterProperties {
// virtual cluster without cluster config file
return DEFAULT_STORAGE_DIR_NAME;
}
-}
+
+ private synchronized void requestPartitionsTakeover(String failedNodeId) {
+ //replica -> list of partitions to takeover
+ Map<String, List<Integer>> partitionRecoveryPlan = new HashMap<>();
+ AsterixReplicationProperties replicationProperties = AsterixAppContextInfo.getInstance()
+ .getReplicationProperties();
+
+ //collect the partitions of the failed NC
+ List<ClusterPartition> lostPartitions = getNodeAssignedPartitions(failedNodeId);
+ for (ClusterPartition partition : lostPartitions) {
+ //find replicas for this partitions
+ Set<String> partitionReplicas = replicationProperties.getNodeReplicasIds(partition.getNodeId());
+ //find a replica that is still active
+ for (String replica : partitionReplicas) {
+ //TODO (mhubail) currently this assigns the partition to the first found active replica.
+ //It needs to be modified to consider load balancing.
+ if (ncConfiguration.containsKey(replica)) {
+ if (!partitionRecoveryPlan.containsKey(replica)) {
+ List<Integer> replicaPartitions = new ArrayList<>();
+ replicaPartitions.add(partition.getPartitionId());
+ partitionRecoveryPlan.put(replica, replicaPartitions);
+ } else {
+ partitionRecoveryPlan.get(replica).add(partition.getPartitionId());
+ }
+ }
+ }
+ }
+
+ ICCMessageBroker messageBroker = (ICCMessageBroker) AsterixAppContextInfo.getInstance()
+ .getCCApplicationContext().getMessageBroker();
+ //For each replica, send a request to takeover the assigned partitions
+ for (String replica : partitionRecoveryPlan.keySet()) {
+ Integer[] partitionsToTakeover = partitionRecoveryPlan.get(replica).toArray(new Integer[] {});
+ long requestId = takeoverRequestId++;
+ TakeoverPartitionsRequestMessage takeoverRequest = new TakeoverPartitionsRequestMessage(requestId, replica,
+ failedNodeId, partitionsToTakeover);
+ pendingTakeoverRequests.put(requestId, takeoverRequest);
+ try {
+ messageBroker.sendApplicationMessageToNC(takeoverRequest, replica);
+ } catch (Exception e) {
+ /**
+ * if we fail to send the request, it means the NC we tried to send the request to
+ * has failed. When the failure notification arrives, we will send any pending request
+ * that belongs to the failed NC to a different active replica.
+ */
+ LOGGER.warning("Failed to send takeover request: " + takeoverRequest);
+ e.printStackTrace();
+ }
+ }
+ }
+
+ private synchronized List<ClusterPartition> getNodeAssignedPartitions(String nodeId) {
+ List<ClusterPartition> nodePartitions = new ArrayList<>();
+ for (ClusterPartition partition : clusterPartitions.values()) {
+ if (partition.getActiveNodeId().equals(nodeId)) {
+ nodePartitions.add(partition);
+ }
+ }
+ /**
+ * if there is any pending takeover request that this node was supposed to handle,
+ * it needs to be sent to a different replica
+ */
+ List<Long> failedTakeoverRequests = new ArrayList<>();
+ for (TakeoverPartitionsRequestMessage request : pendingTakeoverRequests.values()) {
+ if (request.getNodeId().equals(nodeId)) {
+ for (Integer partitionId : request.getPartitions()) {
+ nodePartitions.add(clusterPartitions.get(partitionId));
+ }
+ failedTakeoverRequests.add(request.getId());
+ }
+ }
+
+ //remove failed requests
+ for (Long requestId : failedTakeoverRequests) {
+ pendingTakeoverRequests.remove(requestId);
+ }
+
+ return nodePartitions;
+ }
+
+ private synchronized void requestMetadataNodeTakeover() {
+ //need a new node to takeover metadata node
+ ClusterPartition metadataPartiton = AsterixAppContextInfo.getInstance().getMetadataProperties()
+ .getMetadataPartition();
+ //request the metadataPartition node to register itself as the metadata node
+ TakeoverMetadataNodeRequestMessage takeoverRequest = new TakeoverMetadataNodeRequestMessage();
+ ICCMessageBroker messageBroker = (ICCMessageBroker) AsterixAppContextInfo.getInstance()
+ .getCCApplicationContext().getMessageBroker();
+ try {
+ messageBroker.sendApplicationMessageToNC(takeoverRequest, metadataPartiton.getActiveNodeId());
+ } catch (Exception e) {
+ /**
+ * if we fail to send the request, it means the NC we tried to send the request to
+ * has failed. When the failure notification arrives, a new NC will be assigned to
+ * the metadata partition and a new metadata node takeover request will be sent to it.
+ */
+ LOGGER.warning("Failed to send metadata node takeover request to: " + metadataPartiton.getActiveNodeId());
+ e.printStackTrace();
+ }
+ }
+
+ public synchronized void processPartitionTakeoverResponse(TakeoverPartitionsResponseMessage reponse) {
+ for (Integer partitonId : reponse.getPartitions()) {
+ ClusterPartition partition = clusterPartitions.get(partitonId);
+ partition.setActive(true);
+ partition.setActiveNodeId(reponse.getNodeId());
+ }
+ pendingTakeoverRequests.remove(reponse.getRequestId());
+ resetClusterPartitionConstraint();
+ updateClusterState();
+ }
+
+ public synchronized void processMetadataNodeTakeoverResponse(TakeoverMetadataNodeResponseMessage reponse) {
+ currentMetadataNode = reponse.getNodeId();
+ isMetadataNodeActive = true;
+ LOGGER.info("Current metadata node: " + currentMetadataNode);
+ updateClusterState();
+ }
+
+ public synchronized String getCurrentMetadataNode() {
+ return currentMetadataNode;
+ }
+
+ public boolean isAutoFailoverEnabled() {
+ if (cluster != null && cluster.getDataReplication() != null && cluster.getDataReplication().isEnabled()) {
+ return cluster.getDataReplication().isAutoFailover();
+ }
+ return false;
+ }
+}
\ No newline at end of file