You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2014/04/24 20:14:28 UTC

[1/2] git commit: Integrated log storage into master.

Repository: mesos
Updated Branches:
  refs/heads/master 397dc813e -> f0c952c70


Integrated log storage into master.

Review: https://reviews.apache.org/r/20572/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/f0c952c7
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/f0c952c7
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/f0c952c7

Branch: refs/heads/master
Commit: f0c952c700ab916f5c4fe71961274abbfe9379ec
Parents: 025e469
Author: Vinod Kone <vi...@twitter.com>
Authored: Tue Apr 22 12:55:07 2014 -0700
Committer: Vinod Kone <vi...@twitter.com>
Committed: Thu Apr 24 11:14:19 2014 -0700

----------------------------------------------------------------------
 src/Makefile.am                         |   3 +-
 src/local/local.cpp                     |  27 +++++-
 src/master/constants.cpp                |   3 +-
 src/master/constants.hpp                |  12 +++
 src/master/flags.hpp                    |  56 +++++++++---
 src/master/main.cpp                     |  65 ++++++++++++--
 src/master/master.cpp                   |  12 ++-
 src/state/log.hpp                       |   2 +
 src/tests/balloon_framework_test.sh     |  17 ++--
 src/tests/cluster.hpp                   |  55 +++++++++---
 src/tests/mesos.cpp                     |   9 +-
 src/tests/mesos.hpp                     |  11 +++
 src/tests/registrar_zookeeper_tests.cpp | 126 +++++++++++++++++++++++++++
 src/tests/script.cpp                    |   3 +
 src/zookeeper/group.cpp                 |   8 +-
 15 files changed, 361 insertions(+), 48 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/f0c952c7/src/Makefile.am
----------------------------------------------------------------------
diff --git a/src/Makefile.am b/src/Makefile.am
index 364d63b..9716cfd 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -954,7 +954,8 @@ if HAS_JAVA
                          tests/zookeeper_test_server.cpp		\
                          tests/zookeeper_tests.cpp			\
                          tests/group_tests.cpp				\
-                         tests/allocator_zookeeper_tests.cpp
+                         tests/allocator_zookeeper_tests.cpp		\
+                         tests/registrar_zookeeper_tests.cpp
   mesos_tests_CPPFLAGS += $(JAVA_CPPFLAGS)
   mesos_tests_CPPFLAGS += -DZOOKEEPER_VERSION=\"$(ZOOKEEPER_VERSION)\"
   mesos_tests_LDFLAGS = $(AM_LDFLAGS)

http://git-wip-us.apache.org/repos/asf/mesos/blob/f0c952c7/src/local/local.cpp
----------------------------------------------------------------------
diff --git a/src/local/local.cpp b/src/local/local.cpp
index 297f35b..c9ebadc 100644
--- a/src/local/local.cpp
+++ b/src/local/local.cpp
@@ -17,9 +17,12 @@
  */
 
 #include <map>
+#include <set>
 #include <sstream>
 #include <vector>
 
+#include <process/pid.hpp>
+
 #include <stout/exit.hpp>
 #include <stout/foreach.hpp>
 #include <stout/path.hpp>
@@ -45,11 +48,12 @@
 #include "slave/slave.hpp"
 
 #include "state/in_memory.hpp"
-#include "state/leveldb.hpp"
+#include "state/log.hpp"
 #include "state/protobuf.hpp"
 #include "state/storage.hpp"
 
 using namespace mesos::internal;
+using namespace mesos::internal::log;
 
 using mesos::internal::master::allocator::Allocator;
 using mesos::internal::master::allocator::AllocatorProcess;
@@ -67,6 +71,7 @@ using process::PID;
 using process::UPID;
 
 using std::map;
+using std::set;
 using std::string;
 using std::stringstream;
 using std::vector;
@@ -78,6 +83,7 @@ namespace local {
 
 static Allocator* allocator = NULL;
 static AllocatorProcess* allocatorProcess = NULL;
+static Log* log = NULL;
 static state::Storage* storage = NULL;
 static state::protobuf::State* state = NULL;
 static Registrar* registrar = NULL;
@@ -117,7 +123,23 @@ PID<Master> launch(const Flags& flags, Allocator* _allocator)
     }
 
     if (flags.registry == "in_memory") {
+      if (flags.registry_strict) {
+        EXIT(1) << "Cannot use '--registry_strict' when using in-memory storage"
+                << " based registry";
+      }
       storage = new state::InMemoryStorage();
+    } else if (flags.registry == "log_storage") {
+      if (flags.work_dir.isNone()) {
+        EXIT(1) << "Need to specify --work_dir for log storage based registry";
+      }
+
+      // TODO(vinod): Add support for log storage with ZooKeeper.
+      log = new Log(
+          1,
+          path::join(flags.work_dir.get(), "log_storage"),
+          set<UPID>(),
+          flags.log_auto_initialize);
+      storage = new state::LogStorage(log);
     } else {
       EXIT(1) << "'" << flags.registry << "' is not a supported"
               << " option for registry persistence";
@@ -220,6 +242,9 @@ void shutdown()
 
     delete storage;
     storage = NULL;
+
+    delete log;
+    log = NULL;
   }
 }
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/f0c952c7/src/master/constants.cpp
----------------------------------------------------------------------
diff --git a/src/master/constants.cpp b/src/master/constants.cpp
index ed966bc..e9e5e67 100644
--- a/src/master/constants.cpp
+++ b/src/master/constants.cpp
@@ -33,6 +33,7 @@ const double MIN_CPUS = 0.1;
 const Bytes MIN_MEM = Megabytes(32);
 const Duration SLAVE_PING_TIMEOUT = Seconds(15);
 const uint32_t MAX_SLAVE_PING_TIMEOUTS = 5;
+const Duration MIN_SLAVE_REREGISTER_TIMEOUT = Minutes(10);
 const double RECOVERY_SLAVE_REMOVAL_PERCENT_LIMIT = 1.0; // 100%.
 const size_t MAX_DEACTIVATED_SLAVES = 100000;
 const uint32_t MAX_COMPLETED_FRAMEWORKS = 50;
@@ -40,7 +41,7 @@ const uint32_t MAX_COMPLETED_TASKS_PER_FRAMEWORK = 1000;
 const Duration WHITELIST_WATCH_INTERVAL = Seconds(5);
 const uint32_t TASK_LIMIT = 100;
 const std::string MASTER_INFO_LABEL = "info";
-
+const Duration ZOOKEEPER_SESSION_TIMEOUT = Seconds(10);
 } // namespace mesos {
 } // namespace internal {
 } // namespace master {

http://git-wip-us.apache.org/repos/asf/mesos/blob/f0c952c7/src/master/constants.hpp
----------------------------------------------------------------------
diff --git a/src/master/constants.hpp b/src/master/constants.hpp
index 27ae4f8..2daa9b0 100644
--- a/src/master/constants.hpp
+++ b/src/master/constants.hpp
@@ -38,6 +38,9 @@ namespace master {
 // So we've moved these to have external linkage but perhaps in the future
 // we can revert this.
 
+// TODO(vinod): Move constants that are only used in flags to
+// 'master/flags.hpp'.
+
 // Maximum number of slot offers to have outstanding for each framework.
 extern const int MAX_OFFERS_PER_FRAMEWORK;
 
@@ -53,6 +56,11 @@ extern const Duration SLAVE_PING_TIMEOUT;
 // Maximum number of ping timeouts until slave is considered failed.
 extern const uint32_t MAX_SLAVE_PING_TIMEOUTS;
 
+// The minimum timeout that can be used by a newly elected leader to
+// allow re-registration of slaves. Any slaves that do not re-register
+// within this timeout will be shutdown.
+extern const Duration MIN_SLAVE_REREGISTER_TIMEOUT;
+
 // Default limit on the percentage of slaves that will be removed
 // after recovering if no re-registration attempts were made.
 // TODO(bmahler): There's no value here that works for all setups.
@@ -84,6 +92,10 @@ extern const uint32_t TASK_LIMIT;
 // Label used by the Leader Contender and Detector.
 extern const std::string MASTER_INFO_LABEL;
 
+// Timeout used for ZooKeeper related operations.
+// TODO(vinod): Master detector/contender should use this timeout.
+extern const Duration ZOOKEEPER_SESSION_TIMEOUT;
+
 } // namespace mesos {
 } // namespace internal {
 } // namespace master {

http://git-wip-us.apache.org/repos/asf/mesos/blob/f0c952c7/src/master/flags.hpp
----------------------------------------------------------------------
diff --git a/src/master/flags.hpp b/src/master/flags.hpp
index c83292f..26a0492 100644
--- a/src/master/flags.hpp
+++ b/src/master/flags.hpp
@@ -54,16 +54,27 @@ public:
 
     add(&Flags::work_dir,
         "work_dir",
-        "Where to store master specific files\n",
-        "/tmp/mesos");
+        "Where to store the persistent information stored in the Registry.");
 
-    // TODO(bmahler): Add replicated log backed registry.
     add(&Flags::registry,
         "registry",
         "Persistence strategy for the registry;\n"
-        "available options are 'in_memory'.",
+        "available options are 'in_memory', 'log_storage'.",
         "in_memory");
 
+    // TODO(vinod): Instead of specifying the quorum size consider
+    // specifying the number of masters or the list of masters.
+    add(&Flags::quorum,
+        "quorum",
+        "The size of the quorum of replicas when using 'log_storage' based\n"
+        "Registry. It is imperative to set this value to be a majority of\n"
+        "masters i.e., quorum > (number of masters)/2.");
+
+    add(&Flags::zk_session_timeout,
+        "zk_session_timeout",
+        "ZooKeeper session timeout.",
+        ZOOKEEPER_SESSION_TIMEOUT);
+
     // TODO(bmahler): Set the default to true in 0.20.0.
     add(&Flags::registry_strict,
         "registry_strict",
@@ -71,7 +82,9 @@ public:
         "information stored in the Registry. Setting this to false means\n"
         "that the Registrar will never reject the admission, readmission,\n"
         "or removal of a slave. Consequently, 'false' can be used to\n"
-        "bootstrap the persistent state on a running cluster.",
+        "bootstrap the persistent state on a running cluster.\n"
+        "NOTE: This flag is *experimental* and should not be used in\n"
+        "production yet.",
         false);
 
     add(&Flags::registry_fetch_timeout,
@@ -86,6 +99,23 @@ public:
         "after which the operation is considered a failure.",
         Seconds(5));
 
+    add(&Flags::log_auto_initialize,
+        "log_auto_initialize",
+        "Whether to automatically initialize the log storage used by the\n"
+        "Registry. If this is set to false, the log has to be manually\n"
+        "initialized when used for the very first time.",
+        true);
+
+    add(&Flags::slave_reregister_timeout,
+        "slave_reregister_timeout",
+        "The timeout within which all slaves are expected to re-register\n"
+        "when a new master is elected as the leader. Slaves that do not\n"
+        "re-register within the timeout will be removed from the registry\n"
+        "and will be shutdown if they attempt to communicate with master.\n"
+        "NOTE: This value has to be atleast " +
+        stringify(MIN_SLAVE_REREGISTER_TIMEOUT) + ".",
+        MIN_SLAVE_REREGISTER_TIMEOUT);
+
     // TODO(bmahler): Add a 'Percentage' abstraction for flags.
     // TODO(bmahler): Add a --production flag for production defaults.
     add(&Flags::recovery_slave_removal_limit,
@@ -113,7 +143,7 @@ public:
         "whitelist",
         "Path to a file with a list of slaves\n"
         "(one per line) to advertise offers for.\n"
-        "Path could be of the form 'file:///path/to/file' or '/path/to/file'",
+        "Path could be of the form 'file:///path/to/file' or '/path/to/file'.",
         "*");
 
     add(&Flags::user_sorter,
@@ -127,19 +157,19 @@ public:
         "framework_sorter",
         "Policy to use for allocating resources\n"
         "between a given user's frameworks. Options\n"
-        "are the same as for user_allocator",
+        "are the same as for user_allocator.",
         "drf");
 
     add(&Flags::allocation_interval,
         "allocation_interval",
         "Amount of time to wait between performing\n"
-        " (batch) allocations (e.g., 500ms, 1sec, etc)",
+        " (batch) allocations (e.g., 500ms, 1sec, etc).",
         Seconds(1));
 
     add(&Flags::cluster,
         "cluster",
         "Human readable name for the cluster,\n"
-        "displayed in the webui");
+        "displayed in the webui.");
 
     add(&Flags::roles,
         "roles",
@@ -164,17 +194,21 @@ public:
         "credentials",
         "Path to a file with a list of credentials.\n"
         "Each line contains a 'principal' and 'secret' separated by whitespace.\n"
-        "Path could be of the form 'file:///path/to/file' or '/path/to/file'");
+        "Path could be of the form 'file:///path/to/file' or '/path/to/file'.");
   }
 
   bool version;
   Option<std::string> hostname;
   bool root_submissions;
-  std::string work_dir;
+  Option<std::string> work_dir;
   std::string registry;
+  Option<int> quorum;
+  Duration zk_session_timeout;
   bool registry_strict;
   Duration registry_fetch_timeout;
   Duration registry_store_timeout;
+  bool log_auto_initialize;
+  Duration slave_reregister_timeout;
   std::string recovery_slave_removal_limit;
   std::string webui_dir;
   std::string whitelist;

http://git-wip-us.apache.org/repos/asf/mesos/blob/f0c952c7/src/master/main.cpp
----------------------------------------------------------------------
diff --git a/src/master/main.cpp b/src/master/main.cpp
index ec23781..487db20 100644
--- a/src/master/main.cpp
+++ b/src/master/main.cpp
@@ -18,8 +18,12 @@
 
 #include <stdint.h>
 
+#include <set>
+
 #include <mesos/mesos.hpp>
 
+#include <process/pid.hpp>
+
 #include <stout/check.hpp>
 #include <stout/exit.hpp>
 #include <stout/flags.hpp>
@@ -46,6 +50,7 @@
 #include "master/repairer.hpp"
 
 #include "state/in_memory.hpp"
+#include "state/log.hpp"
 #include "state/protobuf.hpp"
 #include "state/storage.hpp"
 
@@ -53,14 +58,18 @@
 #include "zookeeper/detector.hpp"
 
 using namespace mesos::internal;
+using namespace mesos::internal::log;
 using namespace mesos::internal::master;
 using namespace zookeeper;
 
 using mesos::MasterInfo;
 
+using process::UPID;
+
 using std::cerr;
 using std::cout;
 using std::endl;
+using std::set;
 using std::string;
 
 
@@ -94,15 +103,14 @@ int main(int argc, char** argv)
   uint16_t port;
   flags.add(&port, "port", "Port to listen on", MasterInfo().port());
 
-  string zk;
+  Option<string> zk;
   flags.add(&zk,
             "zk",
             "ZooKeeper URL (used for leader election amongst masters)\n"
             "May be one of:\n"
             "  zk://host1:port1,host2:port2,.../path\n"
             "  zk://username:password@host1:port1,host2:port2,.../path\n"
-            "  file://path/to/file (where file contains one of the above)",
-            "");
+            "  file://path/to/file (where file contains one of the above)");
 
   bool help;
   flags.add(&help,
@@ -157,9 +165,49 @@ int main(int argc, char** argv)
     new allocator::Allocator(allocatorProcess);
 
   state::Storage* storage = NULL;
+  Log* log = NULL;
 
   if (flags.registry == "in_memory") {
+    if (flags.registry_strict) {
+      EXIT(1) << "Cannot use '--registry_strict' when using in-memory storage"
+              << " based registry";
+    }
     storage = new state::InMemoryStorage();
+  } else if (flags.registry == "log_storage") {
+    if (flags.work_dir.isNone()) {
+      EXIT(1) << "Need to specify --work_dir for log storage based registry";
+    }
+
+    if (zk.isSome()) {
+      // Use log storage with ZooKeeper.
+      if (flags.quorum.isNone()) {
+        EXIT(1) << "Need to specify --quorum for log storage based registry"
+                << " when using ZooKeeper";
+      }
+
+      // TODO(vinod): Add support for "--zk=file://" for log storage.
+      Try<URL> url = URL::parse(zk.get());
+      if (url.isError()) {
+        EXIT(1) << "Error parsing ZooKeeper URL: " << url.error();
+      }
+
+      log = new Log(
+          flags.quorum.get(),
+          path::join(flags.work_dir.get(), "log_storage"),
+          url.get().servers,
+          flags.zk_session_timeout,
+          path::join(url.get().path, "log_replicas"),
+          url.get().authentication,
+          flags.log_auto_initialize);
+    } else {
+      // Use log storage without ZooKeeper.
+      log = new Log(
+          1,
+          path::join(flags.work_dir.get(), "log_storage"),
+          set<UPID>(),
+          flags.log_auto_initialize);
+    }
+    storage = new state::LogStorage(log);
   } else {
     EXIT(1) << "'" << flags.registry << "' is not a supported"
             << " option for registry persistence";
@@ -176,13 +224,17 @@ int main(int argc, char** argv)
   MasterContender* contender;
   MasterDetector* detector;
 
-  Try<MasterContender*> contender_ = MasterContender::create(zk);
+  // TODO(vinod): 'MasterContender::create()' should take
+  // Option<string>.
+  Try<MasterContender*> contender_ = MasterContender::create(zk.get(""));
   if (contender_.isError()) {
     EXIT(1) << "Failed to create a master contender: " << contender_.error();
   }
   contender = contender_.get();
 
-  Try<MasterDetector*> detector_ = MasterDetector::create(zk);
+  // TODO(vinod): 'MasterDetector::create()' should take
+  // Option<string>.
+  Try<MasterDetector*> detector_ = MasterDetector::create(zk.get(""));
   if (detector_.isError()) {
     EXIT(1) << "Failed to create a master detector: " << detector_.error();
   }
@@ -200,7 +252,7 @@ int main(int argc, char** argv)
       detector,
       flags);
 
-  if (zk == "") {
+  if (zk.isNone()) {
     // It means we are using the standalone detector so we need to
     // appoint this Master as the leader.
     dynamic_cast<StandaloneMasterDetector*>(detector)->appoint(master->info());
@@ -217,6 +269,7 @@ int main(int argc, char** argv)
   delete repairer;
   delete state;
   delete storage;
+  delete log;
 
   delete contender;
   delete detector;

http://git-wip-us.apache.org/repos/asf/mesos/blob/f0c952c7/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 0335b34..e66d07e 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -264,8 +264,12 @@ void Master::initialize()
   LOG(INFO) << "Master " << info_.id() << " (" << info_.hostname() << ")"
             << " started on " << string(self()).substr(7);
 
-  if (flags.registry_strict) {
-    EXIT(1) << "Cannot run with --registry_strict; currently not supported";
+  // NOTE: We enforce a minimum slave re-register timeout because the
+  // slave bounds its (re-)registration retries based on the minimum.
+  if (flags.slave_reregister_timeout < MIN_SLAVE_REREGISTER_TIMEOUT) {
+    EXIT(1) << "Invalid value '" << flags.slave_reregister_timeout << "' "
+            << "for --slave_reregister_timeout: "
+            << "Must be at least " << MIN_SLAVE_REREGISTER_TIMEOUT;
   }
 
   // Parse the percentage for the slave removal limit.
@@ -791,7 +795,7 @@ Future<Nothing> Master::_recover(const Registry& registry)
   // not respond to health checks.
   // TODO(bmahler): Consider making this configurable.
   slaves.recoveredTimer =
-    delay(SLAVE_PING_TIMEOUT * MAX_SLAVE_PING_TIMEOUTS,
+    delay(flags.slave_reregister_timeout,
           self(),
           &Self::recoveredSlavesTimeout,
           registry);
@@ -799,7 +803,7 @@ Future<Nothing> Master::_recover(const Registry& registry)
   // Recovery is now complete!
   LOG(INFO) << "Recovered " << registry.slaves().slaves().size() << " slaves"
             << " from the Registry (" << Bytes(registry.ByteSize()) << ")"
-            << " ; allowing " << SLAVE_PING_TIMEOUT * MAX_SLAVE_PING_TIMEOUTS
+            << " ; allowing " << flags.slave_reregister_timeout
             << " for slaves to re-register";
 
   return Nothing();

http://git-wip-us.apache.org/repos/asf/mesos/blob/f0c952c7/src/state/log.hpp
----------------------------------------------------------------------
diff --git a/src/state/log.hpp b/src/state/log.hpp
index e25d1e5..6bd054f 100644
--- a/src/state/log.hpp
+++ b/src/state/log.hpp
@@ -9,6 +9,8 @@
 #include <stout/option.hpp>
 #include <stout/uuid.hpp>
 
+#include "log/log.hpp"
+
 #include "messages/state.hpp"
 
 #include "state/storage.hpp"

http://git-wip-us.apache.org/repos/asf/mesos/blob/f0c952c7/src/tests/balloon_framework_test.sh
----------------------------------------------------------------------
diff --git a/src/tests/balloon_framework_test.sh b/src/tests/balloon_framework_test.sh
index f832407..eb18b89 100755
--- a/src/tests/balloon_framework_test.sh
+++ b/src/tests/balloon_framework_test.sh
@@ -24,7 +24,7 @@ fi
 
 MASTER_PID=
 SLAVE_PID=
-SLAVE_WORK_DIR=
+MESOS_WORK_DIR=
 
 # This function ensures that we first kill the slave (if present) and
 # then cleanup the cgroups. This is necessary because a running slave
@@ -52,8 +52,8 @@ function cleanup() {
    rmdir ${TEST_CGROUP_HIERARCHY}/${TEST_CGROUP_ROOT} && sleep 1 && umount ${TEST_CGROUP_HIERARCHY} && rmdir ${TEST_CGROUP_HIERARCHY}
   fi
 
-  if [[ -d "${SLAVE_WORK_DIR}" ]]; then
-    rm -rf ${SLAVE_WORK_DIR};
+  if [[ -d "${MESOS_WORK_DIR}" ]]; then
+    rm -rf ${MESOS_WORK_DIR};
   fi
 }
 
@@ -71,8 +71,13 @@ unset MESOS_SOURCE_DIR
 #unset MESOS_LAUNCHER_DIR # leave this so we can find mesos-fetcher.
 unset MESOS_VERBOSE
 
+MESOS_WORK_DIR=`mktemp -d -t mesos-XXXXXX`
+
 # Launch master.
-${MASTER} --ip=127.0.0.1 --port=5432 &
+${MASTER} \
+    --ip=127.0.0.1 \
+    --port=5432 \
+    --work_dir=${MESOS_WORK_DIR} &
 MASTER_PID=${!}
 echo "${GREEN}Launched master at ${MASTER_PID}${NORMAL}"
 sleep 2
@@ -87,10 +92,8 @@ fi
 
 
 # Launch slave.
-SLAVE_WORK_DIR=`mktemp -d -t mesos-XXXXXX`
-
 ${SLAVE} \
-    --work_dir=${SLAVE_WORK_DIR} \
+    --work_dir=${MESOS_WORK_DIR} \
     --master=127.0.0.1:5432 \
     --isolation=cgroups/mem \
     --cgroups_hierarchy=${TEST_CGROUP_HIERARCHY} \

http://git-wip-us.apache.org/repos/asf/mesos/blob/f0c952c7/src/tests/cluster.hpp
----------------------------------------------------------------------
diff --git a/src/tests/cluster.hpp b/src/tests/cluster.hpp
index 1862fe8..5e905f5 100644
--- a/src/tests/cluster.hpp
+++ b/src/tests/cluster.hpp
@@ -288,18 +288,49 @@ inline Try<process::PID<master::Master> > Cluster::Masters::start(
         new master::allocator::Allocator(allocatorProcess.get());
   }
 
-  // TODO(bmahler): Add flag support for the replicated log and then
-  // just construct based on the flags.
-  log::tool::Initialize initializer;
-
-  initializer.flags.path = path::join(flags.work_dir, ".log");
-  initializer.execute();
-
-  master.log = new log::Log(
-      1,
-      initializer.flags.path.get(),
-      std::set<process::UPID>());
-  master.storage = new state::LogStorage(master.log);
+  if (flags.registry == "in_memory") {
+    if (flags.registry_strict) {
+      return Error(
+          "Cannot use '--registry_strict' when using in-memory storage based"
+          " registry");
+    }
+    master.storage = new state::InMemoryStorage();
+  } else if (flags.registry == "log_storage") {
+    if (flags.work_dir.isNone()) {
+      return Error(
+          "Need to specify --work_dir for log storage based registry");
+    }
+
+    if (url.isSome()) {
+      // Use ZooKeeper based log storage.
+      if (flags.quorum.isNone()) {
+        return Error(
+            "Need to specify --quorum for log storage based registry when using"
+            " ZooKeeper");
+      }
+      master.log = new log::Log(
+          flags.quorum.get(),
+          path::join(flags.work_dir.get(), "log_storage"),
+          url.get().servers,
+          flags.zk_session_timeout,
+          path::join(url.get().path, "log_replicas"),
+          url.get().authentication,
+          flags.log_auto_initialize);
+    } else {
+      master.log = new log::Log(
+          1,
+          path::join(flags.work_dir.get(), "log_storage"),
+          std::set<process::UPID>(),
+          flags.log_auto_initialize);
+    }
+
+    master.storage = new state::LogStorage(master.log);
+  } else {
+    return Error("'" + flags.registry + "' is not a supported option for"
+                 " registry persistence");
+  }
+
+  CHECK_NOTNULL(master.storage);
 
   master.state = new state::protobuf::State(master.storage);
   master.registrar = new master::Registrar(flags, master.state);

http://git-wip-us.apache.org/repos/asf/mesos/blob/f0c952c7/src/tests/mesos.cpp
----------------------------------------------------------------------
diff --git a/src/tests/mesos.cpp b/src/tests/mesos.cpp
index b9388df..ae926c9 100644
--- a/src/tests/mesos.cpp
+++ b/src/tests/mesos.cpp
@@ -37,15 +37,15 @@ master::Flags MesosTest::CreateMasterFlags()
 {
   master::Flags flags;
 
-  // Enable authentication.
-  flags.authenticate = true;
-
   // Create a temporary work directory (removed by Environment).
   Try<std::string> directory = environment->mkdtemp();
   CHECK_SOME(directory) << "Failed to create temporary directory";
 
   flags.work_dir = directory.get();
 
+  // Enable authentication.
+  flags.authenticate = true;
+
   // Create a default credentials file.
   const std::string& path = path::join(directory.get(), "credentials");
 
@@ -57,6 +57,9 @@ master::Flags MesosTest::CreateMasterFlags()
 
   flags.credentials = "file://" + path;
 
+  // Use log storage (without ZooKeeper) by default.
+  flags.registry = "log_storage";
+
   return flags;
 }
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/f0c952c7/src/tests/mesos.hpp
----------------------------------------------------------------------
diff --git a/src/tests/mesos.hpp b/src/tests/mesos.hpp
index 0c5b570..680a648 100644
--- a/src/tests/mesos.hpp
+++ b/src/tests/mesos.hpp
@@ -230,6 +230,17 @@ public:
 protected:
   MesosZooKeeperTest() : MesosTest(url) {}
 
+  virtual master::Flags CreateMasterFlags()
+  {
+    master::Flags flags = MesosTest::CreateMasterFlags();
+
+    // NOTE: Since we are using ZooKeeper with log storage (default
+    // storage in MesosTest) we need to specify a quorum size.
+    flags.quorum = 1;
+
+    return flags;
+  }
+
   static ZooKeeperTestServer* server;
   static Option<zookeeper::URL> url;
 };

http://git-wip-us.apache.org/repos/asf/mesos/blob/f0c952c7/src/tests/registrar_zookeeper_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/registrar_zookeeper_tests.cpp b/src/tests/registrar_zookeeper_tests.cpp
new file mode 100644
index 0000000..376ac1e
--- /dev/null
+++ b/src/tests/registrar_zookeeper_tests.cpp
@@ -0,0 +1,126 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gmock/gmock.h>
+
+#include <string>
+#include <vector>
+
+#include <process/future.hpp>
+#include <process/gmock.hpp>
+#include <process/message.hpp>
+
+#include <stout/option.hpp>
+#include <stout/try.hpp>
+
+#include "master/flags.hpp"
+#include "master/master.hpp"
+
+#include "tests/containerizer.hpp"
+#include "tests/mesos.hpp"
+
+using namespace mesos;
+using namespace mesos::internal;
+using namespace mesos::internal::tests;
+
+using mesos::internal::master::Master;
+
+using mesos::internal::slave::Slave;
+
+using process::Future;
+using process::PID;
+
+using std::string;
+using std::vector;
+
+using testing::_;
+using testing::AtMost;
+using testing::DoAll;
+using testing::Return;
+
+
+class RegistrarZooKeeperTest : public MesosZooKeeperTest {};
+
+
+// This is a simple end to end test that makes sure a master using log
+// storage with ZooKeeper can successfully launch a task.
+TEST_F(RegistrarZooKeeperTest, TaskRunning)
+{
+  Try<PID<Master> > master = StartMaster();
+  ASSERT_SOME(master);
+
+  MockExecutor exec(DEFAULT_EXECUTOR_ID);
+
+  TestContainerizer containerizer(&exec);
+
+  Try<PID<Slave> > slave = StartSlave(&containerizer);
+  ASSERT_SOME(slave);
+
+  MockScheduler sched;
+  MesosSchedulerDriver driver(
+      &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL);
+
+  EXPECT_CALL(sched, registered(&driver, _, _))
+    .Times(1);
+
+  Future<vector<Offer> > offers;
+  EXPECT_CALL(sched, resourceOffers(&driver, _))
+    .WillOnce(FutureArg<1>(&offers))
+    .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+  driver.start();
+
+  AWAIT_READY(offers);
+  EXPECT_NE(0u, offers.get().size());
+
+  TaskInfo task = createTask(offers.get()[0], "dummy", DEFAULT_EXECUTOR_ID);
+  vector<TaskInfo> tasks;
+  tasks.push_back(task);
+
+  EXPECT_CALL(exec, registered(_, _, _, _))
+    .Times(1);
+
+  EXPECT_CALL(exec, launchTask(_, _))
+    .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING));
+
+  Future<Nothing> resourcesUpdated;
+  Future<Nothing> update;
+  EXPECT_CALL(containerizer,
+              update(_, Resources(offers.get()[0].resources())))
+    .WillOnce(DoAll(FutureSatisfy(&resourcesUpdated),
+                    Return(update)));
+
+  Future<TaskStatus> status;
+  EXPECT_CALL(sched, statusUpdate(&driver, _))
+    .WillOnce(FutureArg<1>(&status));
+
+  driver.launchTasks(offers.get()[0].id(), tasks);
+
+  AWAIT_READY(status);
+  EXPECT_EQ(TASK_RUNNING, status.get().state());
+
+  AWAIT_READY(resourcesUpdated);
+
+  EXPECT_CALL(exec, shutdown(_))
+    .Times(AtMost(1));
+
+  driver.stop();
+  driver.join();
+
+  Shutdown(); // Must shutdown before 'containerizer' gets deallocated.
+}

http://git-wip-us.apache.org/repos/asf/mesos/blob/f0c952c7/src/tests/script.cpp
----------------------------------------------------------------------
diff --git a/src/tests/script.cpp b/src/tests/script.cpp
index 09c7f3b..e427572 100644
--- a/src/tests/script.cpp
+++ b/src/tests/script.cpp
@@ -106,6 +106,9 @@ void execute(const string& script)
     os::setenv("MESOS_WEBUI_DIR", path::join(flags.source_dir, "src", "webui"));
     os::setenv("MESOS_LAUNCHER_DIR", path::join(flags.build_dir, "src"));
 
+    // Enable log storage based registry.
+    os::setenv("MESOS_REGISTRY", "log_storage");
+
     // Enable authentication.
     os::setenv("MESOS_AUTHENTICATE", "true");
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/f0c952c7/src/zookeeper/group.cpp
----------------------------------------------------------------------
diff --git a/src/zookeeper/group.cpp b/src/zookeeper/group.cpp
index bdebc48..7097298 100644
--- a/src/zookeeper/group.cpp
+++ b/src/zookeeper/group.cpp
@@ -703,9 +703,13 @@ Try<bool> GroupProcess::cache()
     Try<int32_t> sequence = numify<int32_t>(tokens.back());
 
     // Skip it if it couldn't be converted to a number.
+    // NOTE: This is currently possible when using a log storage
+    // based registry because the log replicas register under
+    // "/log_replicas" at the same path as the masters' ephemeral
+    // znodes.
     if (sequence.isError()) {
-      LOG(WARNING) << "Found non-sequence node '" << result
-                   << "' at '" << znode << "' in ZooKeeper";
+      VLOG(1) << "Found non-sequence node '" << result
+              << "' at '" << znode << "' in ZooKeeper";
       continue;
     }
 


[2/2] git commit: Renamed and moved MasterZooKeeperTest fixture.

Posted by vi...@apache.org.
Renamed and moved MasterZooKeeperTest fixture.

Review: https://reviews.apache.org/r/20584


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/025e4695
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/025e4695
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/025e4695

Branch: refs/heads/master
Commit: 025e4695f57c6accb8581a9161290d9ae7065735
Parents: 397dc81
Author: Vinod Kone <vi...@twitter.com>
Authored: Tue Apr 22 15:55:51 2014 -0700
Committer: Vinod Kone <vi...@twitter.com>
Committed: Thu Apr 24 11:14:19 2014 -0700

----------------------------------------------------------------------
 src/tests/allocator_zookeeper_tests.cpp | 39 +-------------------------
 src/tests/master_tests.cpp              | 42 +---------------------------
 src/tests/mesos.cpp                     |  5 ++++
 src/tests/mesos.hpp                     | 41 +++++++++++++++++++++++++++
 4 files changed, 48 insertions(+), 79 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/025e4695/src/tests/allocator_zookeeper_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/allocator_zookeeper_tests.cpp b/src/tests/allocator_zookeeper_tests.cpp
index 9ad0fa7..1f15cf5 100644
--- a/src/tests/allocator_zookeeper_tests.cpp
+++ b/src/tests/allocator_zookeeper_tests.cpp
@@ -55,44 +55,7 @@ using testing::DoDefault;
 
 
 template <typename T = AllocatorProcess>
-class AllocatorZooKeeperTest : public MesosTest
-{
-public:
-  static void SetUpTestCase()
-  {
-    // Make sure the JVM is created.
-    ZooKeeperTest::SetUpTestCase();
-
-    // Launch the ZooKeeper test server.
-    server = new ZooKeeperTestServer();
-    server->startNetwork();
-
-    Try<zookeeper::URL> parse = zookeeper::URL::parse(
-        "zk://" + server->connectString() + "/znode");
-    ASSERT_SOME(parse);
-
-    url = parse.get();
-  }
-
-  static void TearDownTestCase()
-  {
-    delete server;
-    server = NULL;
-  }
-
-protected:
-  AllocatorZooKeeperTest() : MesosTest(url) {}
-
-  static ZooKeeperTestServer* server;
-  static Option<zookeeper::URL> url;
-};
-
-
-template <typename T>
-ZooKeeperTestServer* AllocatorZooKeeperTest<T>::server = NULL;
-
-template <typename T>
-Option<zookeeper::URL> AllocatorZooKeeperTest<T>::url;
+class AllocatorZooKeeperTest : public MesosZooKeeperTest {};
 
 
 // Runs TYPED_TEST(AllocatorZooKeeperTest, ...) on all AllocatorTypes.

http://git-wip-us.apache.org/repos/asf/mesos/blob/025e4695/src/tests/master_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_tests.cpp b/src/tests/master_tests.cpp
index c429432..8371df2 100644
--- a/src/tests/master_tests.cpp
+++ b/src/tests/master_tests.cpp
@@ -53,10 +53,6 @@
 #include "tests/containerizer.hpp"
 #include "tests/mesos.hpp"
 
-#ifdef MESOS_HAS_JAVA
-#include "tests/zookeeper.hpp"
-#endif
-
 using namespace mesos;
 using namespace mesos::internal;
 using namespace mesos::internal::tests;
@@ -1299,44 +1295,8 @@ TEST_F(MasterTest, MetricsInStatsEndpoint)
 
 
 #ifdef MESOS_HAS_JAVA
-class MasterZooKeeperTest : public MesosTest
-{
-public:
-  static void SetUpTestCase()
-  {
-    // Make sure the JVM is created.
-    ZooKeeperTest::SetUpTestCase();
-
-    // Launch the ZooKeeper test server.
-    server = new ZooKeeperTestServer();
-    server->startNetwork();
-
-    Try<zookeeper::URL> parse = zookeeper::URL::parse(
-        "zk://" + server->connectString() + "/znode");
-    ASSERT_SOME(parse);
-
-    url = parse.get();
-  }
-
-  static void TearDownTestCase()
-  {
-    delete server;
-    server = NULL;
-  }
-
-protected:
-  MasterZooKeeperTest() : MesosTest(url) {}
-
-  static ZooKeeperTestServer* server;
-  static Option<zookeeper::URL> url;
-};
-
-
-ZooKeeperTestServer* MasterZooKeeperTest::server = NULL;
-
-
-Option<zookeeper::URL> MasterZooKeeperTest::url;
 
+class MasterZooKeeperTest : public MesosZooKeeperTest {};
 
 // This test verifies that when the ZooKeeper cluster is lost,
 // master, slave & scheduler all get informed.

http://git-wip-us.apache.org/repos/asf/mesos/blob/025e4695/src/tests/mesos.cpp
----------------------------------------------------------------------
diff --git a/src/tests/mesos.cpp b/src/tests/mesos.cpp
index a9844e4..b9388df 100644
--- a/src/tests/mesos.cpp
+++ b/src/tests/mesos.cpp
@@ -25,6 +25,11 @@ namespace mesos {
 namespace internal {
 namespace tests {
 
+#ifdef MESOS_HAS_JAVA
+ZooKeeperTestServer* MesosZooKeeperTest::server = NULL;
+Option<zookeeper::URL> MesosZooKeeperTest::url;
+#endif // MESOS_HAS_JAVA
+
 MesosTest::MesosTest(const Option<zookeeper::URL>& url) : cluster(url) {}
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/025e4695/src/tests/mesos.hpp
----------------------------------------------------------------------
diff --git a/src/tests/mesos.hpp b/src/tests/mesos.hpp
index 7bc5e98..0c5b570 100644
--- a/src/tests/mesos.hpp
+++ b/src/tests/mesos.hpp
@@ -54,6 +54,10 @@
 
 #include "tests/cluster.hpp"
 
+#ifdef MESOS_HAS_JAVA
+#include "tests/zookeeper.hpp"
+#endif // MESOS_HAS_JAVA
+
 namespace mesos {
 namespace internal {
 namespace tests {
@@ -196,6 +200,43 @@ protected:
 #endif // __linux__
 
 
+#ifdef MESOS_HAS_JAVA
+
+class MesosZooKeeperTest : public MesosTest
+{
+public:
+  static void SetUpTestCase()
+  {
+    // Make sure the JVM is created.
+    ZooKeeperTest::SetUpTestCase();
+
+    // Launch the ZooKeeper test server.
+    server = new ZooKeeperTestServer();
+    server->startNetwork();
+
+    Try<zookeeper::URL> parse = zookeeper::URL::parse(
+        "zk://" + server->connectString() + "/znode");
+    ASSERT_SOME(parse);
+
+    url = parse.get();
+  }
+
+  static void TearDownTestCase()
+  {
+    delete server;
+    server = NULL;
+  }
+
+protected:
+  MesosZooKeeperTest() : MesosTest(url) {}
+
+  static ZooKeeperTestServer* server;
+  static Option<zookeeper::URL> url;
+};
+
+#endif // MESOS_HAS_JAVA
+
+
 // Macros to get/create (default) ExecutorInfos and FrameworkInfos.
 #define DEFAULT_EXECUTOR_INFO                                           \
       ({ ExecutorInfo executor;                                         \