You are viewing a plain text version of this content. The canonical link for it is here.
Posted to awf-commits@incubator.apache.org by sc...@apache.org on 2011/08/15 19:44:03 UTC
svn commit: r1157967 - in /incubator/deft/sandbox/demo: ./ elastica/
elastica/src/ elastica/src/main/ elastica/src/main/java/
elastica/src/main/java/org/ elastica/src/main/java/org/apache/
elastica/src/main/java/org/apache/deft/ elastica/src/main/java/...
Author: schildmeijer
Date: Mon Aug 15 19:44:02 2011
New Revision: 1157967
URL: http://svn.apache.org/viewvc?rev=1157967&view=rev
Log:
DEFT-151 - demo application using Deft (Elastica -- ("Demo framework" for building a decentralized gossip based distributed system with accrual failure detection))
Added:
incubator/deft/sandbox/demo/
incubator/deft/sandbox/demo/elastica/
incubator/deft/sandbox/demo/elastica/README
incubator/deft/sandbox/demo/elastica/src/
incubator/deft/sandbox/demo/elastica/src/main/
incubator/deft/sandbox/demo/elastica/src/main/java/
incubator/deft/sandbox/demo/elastica/src/main/java/org/
incubator/deft/sandbox/demo/elastica/src/main/java/org/apache/
incubator/deft/sandbox/demo/elastica/src/main/java/org/apache/deft/
incubator/deft/sandbox/demo/elastica/src/main/java/org/apache/deft/demo/
incubator/deft/sandbox/demo/elastica/src/main/java/org/apache/deft/demo/elastica/
incubator/deft/sandbox/demo/elastica/src/main/java/org/apache/deft/demo/elastica/FailureDetector.java
incubator/deft/sandbox/demo/elastica/src/main/java/org/apache/deft/demo/elastica/Gossiper.java
incubator/deft/sandbox/demo/elastica/src/main/java/org/apache/deft/demo/elastica/MessagingService.java
incubator/deft/sandbox/demo/elastica/src/main/java/org/apache/deft/demo/elastica/NodeStateChangeListener.java
Added: incubator/deft/sandbox/demo/elastica/README
URL: http://svn.apache.org/viewvc/incubator/deft/sandbox/demo/elastica/README?rev=1157967&view=auto
==============================================================================
--- incubator/deft/sandbox/demo/elastica/README (added)
+++ incubator/deft/sandbox/demo/elastica/README Mon Aug 15 19:44:02 2011
@@ -0,0 +1,28 @@
+Elastica is a "demo framework" for building a decentralized gossip based distributed system with accrual failure
+detection. It's also an implementation of: The Phi Accrual Failure Detector[1] and Amazons paper on Gossip[2]
+[1] http://ddg.jaist.ac.jp/pub/HDY+04.pdf
+[2] http://www.cs.cornell.edu/home/rvr/papers/flowgossip.pdf
+
+External Dependencies needed for Elastica (not included in Deft binary package):
+json-simple ("A simple Java toolkit for JSON") http://code.google.com/p/json-simple/ (Apache License 2.0)
+
+
+Usage:
+There are two configuration points (per node), 'seed' and 'address' you need to change before you are ready to start
+your cluster.
+They are located in the MessagingService class. The seed should be a stable node that new nodes will use for bootstrapping
+into the existing cluster. The address is you local ip and port that other nodes in the cluster will connect to.
+Your seed will have seed == address (that is how a seed is defined).
+
+Example 1.
+A two node cluster, running on the same machine
+Node A (seed node). seed = 127.0.0.1:14922, address = 127.0.0.1:14922
+Node B. seed = 127.0.0.1:14922, address = 127.0.0.1:14923
+
+Example 2
+A three node cluster, running on three different machines
+Node A (seed node). seed = 192.168.0.1:14922, address = 192.168.0.1:14922
+Node B. seed = 192.168.0.1:14922, address = 192.168.0.1:14923
+Node C. seed = 192.168.0.1:14922, address = 192.168.0.1:14924
+
+
Added: incubator/deft/sandbox/demo/elastica/src/main/java/org/apache/deft/demo/elastica/FailureDetector.java
URL: http://svn.apache.org/viewvc/incubator/deft/sandbox/demo/elastica/src/main/java/org/apache/deft/demo/elastica/FailureDetector.java?rev=1157967&view=auto
==============================================================================
--- incubator/deft/sandbox/demo/elastica/src/main/java/org/apache/deft/demo/elastica/FailureDetector.java (added)
+++ incubator/deft/sandbox/demo/elastica/src/main/java/org/apache/deft/demo/elastica/FailureDetector.java Mon Aug 15 19:44:02 2011
@@ -0,0 +1,92 @@
+package org.apache.deft.demo.elastica;
+
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+import com.google.common.collect.Maps;
+
+/**
+ * Java implementation of 'The Phi Accrual Failure Detector' by Hayashibara et al.
+ *
+ * Failure detection is the process of determining which nodes in a distributed fault-tolerant system have failed.
+ * Original Phi Accrual Failure Detection paper: http://ddg.jaist.ac.jp/pub/HDY+04.pdf
+ *
+ * A low threshold is prone to generate many wrong suspicions but ensures a quick detection in the event of a real crash.
+ * Conversely, a high threshold generates fewer mistakes but needs more time to detect actual crashes"""
+ *
+ * (Original (python) version by Brandon Williams (github.com/driftx), modified by Roger Schildmeijer
+ * (github.com/rchildmeijer))
+ */
+
+public class FailureDetector {
+
+ private static final int MAX_SAMPLE_SIZE = 1000;
+
+ /**
+ * 1 = 10% error rate, 2 = 1%, 3 = 0.1%.., (eg threshold=3. no heartbeat for >6s => node marked as dead
+ */
+ private static final int THRESHOLD = 3;
+
+ private final Map<String, List<Long>> intervals = Maps.newHashMap();
+ private final Map<String, Map<String, Long>> hosts = Maps.newHashMap();
+ private final Map<String, Long> timestamps = Maps.newHashMap();
+
+ /**
+ * Called when host has indicated being alive (aka heartbeat)
+ * @param host format = "ip:port", eg. "192.168.0.1:14922"
+ */
+ public void heartbeat(String host) {
+ if (!timestamps.containsKey(host)) {
+ timestamps.put(host, System.currentTimeMillis());
+ intervals.put(host, new LinkedList<Long>());
+ hosts.put(host, new HashMap<String, Long>());
+ } else {
+ long now = System.currentTimeMillis();
+ long interval = now - timestamps.get(host);
+ timestamps.put(host, now);
+ intervals.get(host).add(interval);
+ if (intervals.get(host).size() > MAX_SAMPLE_SIZE) {
+ int size = intervals.get(host).size();
+ intervals.get(host).remove(size - 1);
+ }
+ if (intervals.get(host).size() > 1) {
+ long numerator = sum(intervals.get(host));
+ double denominator = intervals.get(host).size();
+ hosts.get(host).put("mean", (long) (numerator / denominator));
+ }
+
+ }
+ }
+
+ private double probability(String host, long timestamp) {
+ double exponent = -1.0 * timestamp / hosts.get(host).get("mean");
+ return 1 - (1.0 - Math.pow(Math.E, exponent));
+ }
+
+ public double phi(String host) {
+ Map<String, Long> means = hosts.get(host);
+ if (/*means == null || */means.isEmpty()) {
+ return 0.0;
+ }
+ long timestamp = System.currentTimeMillis();
+ long diff = timestamp - timestamps.get(host);
+ double prob = probability(host, diff);
+ if (Double.isNaN(prob)) { // is isNaN correct? is isZero would be more correct
+ return Math.pow(1, -128);
+ }
+ return -1 * Math.log10(prob);
+ }
+
+ public boolean isAlive(String host) { return phi(host) < THRESHOLD; }
+
+ public boolean isDead(String host) { return !isAlive(host); }
+
+ private Long sum(List<Long> values) {
+ long sum = 0;
+ for (Long value : values) sum += value;
+ return sum;
+ }
+
+}
Added: incubator/deft/sandbox/demo/elastica/src/main/java/org/apache/deft/demo/elastica/Gossiper.java
URL: http://svn.apache.org/viewvc/incubator/deft/sandbox/demo/elastica/src/main/java/org/apache/deft/demo/elastica/Gossiper.java?rev=1157967&view=auto
==============================================================================
--- incubator/deft/sandbox/demo/elastica/src/main/java/org/apache/deft/demo/elastica/Gossiper.java (added)
+++ incubator/deft/sandbox/demo/elastica/src/main/java/org/apache/deft/demo/elastica/Gossiper.java Mon Aug 15 19:44:02 2011
@@ -0,0 +1,196 @@
+package org.apache.deft.demo.elastica;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.deft.io.callback.PeriodicCallback;
+import org.apache.deft.web.AsyncCallback;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+/**
+ * This module is responsible for Gossiping cluster information. The abstraction
+ * maintains the list of live and dead nodes. Periodically i.e. every 1 second this module
+ * chooses a random node and initiates a round of Gossip with it. This module as and when it hears a gossip
+ * updates the Failure Detector with the liveness information.
+ *
+ * Amazon paper on Gossip at http://www.cs.cornell.edu/home/rvr/papers/flowgossip.pdf
+
+ * Gossip timer task runs every second.
+ * During each of these runs the node initiates gossip exchange according to following rules:
+ * 1, Gossip to random live node (if any)
+ * 2, Gossip to random unreachable node with certain probability depending on number of unreachable and live nodes
+ * 3, If the node gossiped to at (1) was not seed, or the number of live nodes is less than number of seeds,
+ * gossip to random seed with certain probability depending on number of unreachable, seed and live nodes.
+ *
+ */
+public class Gossiper {
+
+ private final FailureDetector fd;
+ private final MessagingService ms;
+ private final String seed; // seed address (e.g. 192.168.0.1:14922)
+ private final String address; // local address (e.g. 192.168.0.2:14923)
+ private final Map<String, Map<String, Long>> nodeStates = Maps.newHashMap();
+ private final List<String> aliveNodes = Lists.newLinkedList();
+ private final List<String> deadNodes = Lists.newLinkedList();
+ private final List<NodeStateChangeListener> nodeStateChangeListeners = Lists.newLinkedList();
+ private final AtomicLong version = new AtomicLong(0);
+ private long generation = System.currentTimeMillis();
+
+ public Gossiper(FailureDetector fd, MessagingService ms, String seed, String address) {
+ this.fd = fd;
+ this.ms = ms;
+ this.seed = seed;
+ this.address = address;
+ new PeriodicCallback(
+ new AsyncCallback() { public void onCallback() { inititateGossipExhange(); scrutinizeCluster(); }},
+ 1000
+ ).start();
+
+ }
+
+
+ private void inititateGossipExhange() {
+ version.incrementAndGet();
+ boolean gossipToSeed = false;
+ if (!aliveNodes.isEmpty()) {
+ gossipToSeed = sendGossip(aliveNodes);
+ }
+
+ if (!deadNodes.isEmpty()) {
+ double probability = ((double) deadNodes.size()) / (aliveNodes.size() + 1);
+ if (new Random().nextDouble() < probability) {
+ sendGossip(deadNodes);
+ }
+ }
+
+ if (!gossipToSeed || aliveNodes.size() < 1) {
+ // gossip to a seed for facilitating partition healing
+ if (seed.equals(address)) {
+ return;
+ }
+ if (aliveNodes.isEmpty()) {
+ sendGossip(Collections.singletonList(seed));
+ }
+ else {
+ double probability = 1.0 / (aliveNodes.size() + deadNodes.size());
+ if (new Random().nextDouble() <= probability) {
+ sendGossip(Collections.singletonList(seed));
+ }
+ }
+ }
+ }
+
+ /** Returns true if the gossip was sent to a seed
+ * The gossip will be sent to a randomly chosen node from the nodes list.
+ */
+ private boolean sendGossip(List<String> nodes) {
+ List<String> copy = Lists.newLinkedList(nodes);
+ Collections.shuffle(copy);
+ String node = copy.get(0);
+ Map<String, Map<String, Long>> gossip = Maps.newHashMap(nodeStates);
+ // dont gossip about dead nodes
+ for (String host : deadNodes) {
+ gossip.remove(host);
+ }
+ gossip.put(address, localState());
+ ms.sendOneWay(node, gossip);
+ return node == seed;
+ }
+
+ private Map<String, Long> localState() {
+ Map<String, Long> myState = Maps.newHashMap();
+ myState.put("generation", generation);
+ myState.put("version", version.get());
+ return myState;
+ }
+
+
+ private void scrutinizeCluster() {
+ List<String> dead = Lists.newLinkedList();
+ for (String host : aliveNodes) {
+ if (fd.isDead(host)) {
+ dead.add(host);
+ }
+ }
+
+ if (!dead.isEmpty()) {
+ deadNodes.addAll(dead);
+ aliveNodes.removeAll(dead);
+ for (String host: dead) {
+ notifyOnDead(host);
+ }
+ }
+ }
+
+ /**
+ * Invoked by the MessagingService when we receive gossip from another node in the cluster
+ */
+ public void deliverNewGossip(Map<String, Map<String, Long>> gossip, String sender) {
+ //gossip will contain info about me
+ gossip.remove(address);
+ for (String host : gossip.keySet()) {
+ Map<String, Long> digest = gossip.get(host);
+ if (nodeStates.containsKey(host)) {
+ // has digest about host, maybe update
+ if (digest.get("generation") > nodeStates.get(host).get("generation")) {
+ // node has restarted
+ if (aliveNodes.contains(host)) {
+ // we haven't marked the restarted node as dead yet (fast restart maybe)
+ // mark as dead so maintenance like resetting connection pools can occur
+ aliveNodes.remove(host);
+ deadNodes.add(host);
+ notifyOnDead(host);
+
+ }
+ updateNodeState(host, digest);
+ } else if (digest.get("version") > nodeStates.get(host).get("version") &&
+ digest.get("generation").longValue() == nodeStates.get(host).get("generation").longValue()) {
+ updateNodeState(host, digest);
+ }
+ } else {
+ // had no previous info about host
+ nodeStates.put(host, digest);
+ fd.heartbeat(host);
+ aliveNodes.add(host);
+ notifyOnJoin(host);
+ }
+ }
+ }
+
+ private void updateNodeState(String host, Map<String, Long> digest) {
+ fd.heartbeat(host);
+ nodeStates.put(host, digest);
+ if (deadNodes.contains(host)) {
+ deadNodes.remove(host);
+ aliveNodes.add(host);
+ notifyOnAlive(host);
+ }
+ }
+
+ private void notifyOnJoin(String host) {
+ System.out.println("onJoin(" + host + ")");
+ for (NodeStateChangeListener listener : nodeStateChangeListeners) {
+ listener.onJoin(host);
+ }
+ }
+
+ private void notifyOnAlive(String host) {
+ System.out.println("onAlive(" + host + ")");
+ for (NodeStateChangeListener listener : nodeStateChangeListeners) {
+ listener.onAlive (host);
+ }
+ }
+
+ private void notifyOnDead(String host) {
+ System.out.println("onDead(" + host + ")");
+ for (NodeStateChangeListener listener : nodeStateChangeListeners) {
+ listener.onDead(host);
+ }
+ }
+
+}
Added: incubator/deft/sandbox/demo/elastica/src/main/java/org/apache/deft/demo/elastica/MessagingService.java
URL: http://svn.apache.org/viewvc/incubator/deft/sandbox/demo/elastica/src/main/java/org/apache/deft/demo/elastica/MessagingService.java?rev=1157967&view=auto
==============================================================================
--- incubator/deft/sandbox/demo/elastica/src/main/java/org/apache/deft/demo/elastica/MessagingService.java (added)
+++ incubator/deft/sandbox/demo/elastica/src/main/java/org/apache/deft/demo/elastica/MessagingService.java Mon Aug 15 19:44:02 2011
@@ -0,0 +1,153 @@
+package org.apache.deft.demo.elastica;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.nio.channels.ServerSocketChannel;
+import java.nio.channels.SocketChannel;
+import java.util.Map;
+
+import org.apache.deft.io.AsynchronousSocket;
+import org.apache.deft.io.IOLoop;
+import org.apache.deft.util.AcceptUtil;
+import org.apache.deft.web.AsyncCallback;
+import org.apache.deft.web.AsyncResult;
+import org.json.simple.JSONObject;
+import org.json.simple.JSONValue;
+import org.json.simple.parser.JSONParser;
+import org.json.simple.parser.ParseException;
+
+import com.google.common.collect.Maps;
+
+public class MessagingService {
+
+ private final String seed = "127.0.0.1:14922";
+ private final String address = "127.0.0.1:14922";
+ private final Map<String, AsynchronousSocket> sockets = Maps.newHashMap();
+ private final Gossiper gossiper;
+
+ private final AsyncCallback deafAsyncCallback = new AsyncCallback() { public void onCallback() { /*nop*/}};
+
+ public MessagingService() {
+ FailureDetector fd = new FailureDetector();
+ gossiper = new Gossiper(fd, this, seed, address);
+ bind();
+ }
+
+ public void sendOneWay(String to, Map<String, Map<String, Long>> gossip) {
+ String data = JSONValue.toJSONString(gossip) + "\r\n";
+ //System.out.println("Sending data: " + data.trim() + " to: " + to);
+ if (to.equals(address)) {
+ // handle local delivery
+ System.out.println("Local delivery..");
+ }
+ if (sockets.containsKey(to)) {
+ sockets.get(to).write(data.getBytes() , deafAsyncCallback);
+ } else {
+ connectToNode(to, data);
+ }
+ }
+
+ private void bind() {
+ try {
+ final ServerSocketChannel server = ServerSocketChannel.open();
+ server.configureBlocking(false);
+ int port = Integer.valueOf(address.split(":")[1]);
+ InetSocketAddress endpoint = new InetSocketAddress(port); // use the "any" (0.0.0.0) address
+ server.socket().bind(endpoint);
+ AcceptUtil.accept(server, new AsyncCallback() { public void onCallback() { onAccept(server);} });
+ System.out.println("Elastica node bootstrapped successfully");
+ } catch (IOException e) {
+ System.err.println("Error during bind");
+ }
+ }
+
+ private void onAccept(ServerSocketChannel server) {
+ try {
+ SocketChannel clientChannel = server.accept();
+ final AsynchronousSocket client = new AsynchronousSocket(clientChannel);
+ final String remoteHost = clientChannel.socket().getRemoteSocketAddress().toString();
+ sockets.put(remoteHost, client);
+ client.setCloseCallback(new AsyncCallback() { public void onCallback() { handleClose(remoteHost); }});
+ client.readUntil(
+ "\r\n".getBytes(),
+ new AsyncResult<byte[]>() {
+ public void onSuccess(byte[] result) { handleRead(result, remoteHost); }
+ public void onFailure(Throwable caught) { /* handleOnFailure(caught); */}
+ }
+ );
+ } catch (IOException ioe) {
+ System.err.println("Exception during client accept");
+ }
+ }
+
+ /**
+ * (JSON) format for data:
+ * {
+ * "127.0.0.1:14922" :
+ * {
+ * "version" : 1,
+ * "generation: 1234973945923
+ * },
+ * // etc
+ * }
+ *
+ */
+ private void handleRead(byte[] rawData, final String host) {
+ String data = new String(rawData).trim(); // remove the trailing "\r\n" (protocol specific)
+ //System.out.println("received data: " + data);
+ try {
+ JSONObject gossip = (JSONObject) new JSONParser().parse(data);
+ gossiper.deliverNewGossip(gossip, host);
+ } catch (ParseException e) {
+ System.err.println("ParseException in handleRead(..)");
+ }
+ sockets.get(host).readUntil(
+ "\r\n".getBytes(),
+ new AsyncResult<byte[]>() {
+ public void onSuccess(byte[] result) { handleRead(result, host); }
+ public void onFailure(Throwable caught) { /* handleOnFailure(caught); */}
+ }
+ );
+ }
+
+ private void handleClose(String host) {
+ System.out.println("handleClose(..), closing session with: " + host);
+ sockets.remove(host);
+ }
+
+ private void connectToNode(final String host, final String data) {
+ try {
+ SocketChannel sc = SocketChannel.open();
+ final AsynchronousSocket client = new AsynchronousSocket(sc);
+ String[] ipAndPort = host.split(":");
+ int port = Integer.valueOf(ipAndPort[1]);
+ client.connect(ipAndPort[0], port, new AsyncResult<Boolean>() {
+ public void onSuccess(Boolean result) { onConnect(host, data, client); }
+ public void onFailure(Throwable caught) { /* handleOnFailure(caught) */ }
+ });
+ } catch (IOException e) {
+ System.err.println("Could not connect to host: " + host);
+ }
+ }
+
+ private void onConnect(final String host, String data, AsynchronousSocket client) {
+ sockets.put(host, client);
+ client.setCloseCallback(new AsyncCallback() { public void onCallback() { handleClose(host); }});
+ client.readUntil(
+ "\r\n".getBytes(),
+ new AsyncResult<byte[]>() {
+ public void onSuccess(byte[] result) { handleRead(result, host); }
+ public void onFailure(Throwable caught) { /* handleOnFailure(caught); */}
+ }
+ );
+ if (data != null) {
+ client.write(data.getBytes(), deafAsyncCallback);
+ }
+ }
+
+ public static void main(String[] args) {
+ new MessagingService(); // bootstrap;
+ IOLoop.INSTANCE.start();
+ }
+
+}
Added: incubator/deft/sandbox/demo/elastica/src/main/java/org/apache/deft/demo/elastica/NodeStateChangeListener.java
URL: http://svn.apache.org/viewvc/incubator/deft/sandbox/demo/elastica/src/main/java/org/apache/deft/demo/elastica/NodeStateChangeListener.java?rev=1157967&view=auto
==============================================================================
--- incubator/deft/sandbox/demo/elastica/src/main/java/org/apache/deft/demo/elastica/NodeStateChangeListener.java (added)
+++ incubator/deft/sandbox/demo/elastica/src/main/java/org/apache/deft/demo/elastica/NodeStateChangeListener.java Mon Aug 15 19:44:02 2011
@@ -0,0 +1,11 @@
+package org.apache.deft.demo.elastica;
+
+public interface NodeStateChangeListener {
+
+ void onJoin(String host);
+
+ void onAlive(String host);
+
+ void onDead(String host);
+
+}