You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by jb...@apache.org on 2011/03/04 22:08:22 UTC
svn commit: r1078134 - in /cassandra/branches/cassandra-0.7: ./ interface/
interface/thrift/gen-java/org/apache/cassandra/thrift/
src/java/org/apache/cassandra/service/
test/unit/org/apache/cassandra/service/
Author: jbellis
Date: Fri Mar 4 21:08:21 2011
New Revision: 1078134
URL: http://svn.apache.org/viewvc?rev=1078134&view=rev
Log:
add CL.TWO, THREE
patch by Peter Schuller; reviewed by tjake for CASSANDRA-2013
Modified:
cassandra/branches/cassandra-0.7/CHANGES.txt
cassandra/branches/cassandra-0.7/interface/cassandra.thrift
cassandra/branches/cassandra-0.7/interface/thrift/gen-java/org/apache/cassandra/thrift/ConsistencyLevel.java
cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/ReadCallback.java
cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/WriteResponseHandler.java
cassandra/branches/cassandra-0.7/test/unit/org/apache/cassandra/service/ConsistencyLevelTest.java
Modified: cassandra/branches/cassandra-0.7/CHANGES.txt
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/CHANGES.txt?rev=1078134&r1=1078133&r2=1078134&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/CHANGES.txt (original)
+++ cassandra/branches/cassandra-0.7/CHANGES.txt Fri Mar 4 21:08:21 2011
@@ -4,6 +4,7 @@
* initialize endpoing in gossiper earlier (CASSANDRA-2228)
* add ability to write to Cassandra from Pig (CASSANDRA-1828)
* add rpc_[min|max]_threads (CASSANDRA-2176)
+ * add CL.TWO, CL.THREE (CASSANDRA-2013)
0.7.3
Modified: cassandra/branches/cassandra-0.7/interface/cassandra.thrift
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/interface/cassandra.thrift?rev=1078134&r1=1078133&r2=1078134&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/interface/cassandra.thrift (original)
+++ cassandra/branches/cassandra-0.7/interface/cassandra.thrift Fri Mar 4 21:08:21 2011
@@ -130,29 +130,47 @@ exception AuthorizationException {
#
# service api
#
-/** The ConsistencyLevel is an enum that controls both read and write behavior based on <ReplicationFactor> in your
- * storage-conf.xml. The different consistency levels have different meanings, depending on if you're doing a write or read
- * operation. Note that if W + R > ReplicationFactor, where W is the number of nodes to block for on write, and R
- * the number to block for on reads, you will have strongly consistent behavior; that is, readers will always see the most
- * recent write. Of these, the most interesting is to do QUORUM reads and writes, which gives you consistency while still
- * allowing availability in the face of node failures up to half of <ReplicationFactor>. Of course if latency is more
- * important than consistency then you can use lower values for either or both.
+/**
+ * The ConsistencyLevel is an enum that controls both read and write
+ * behavior based on the ReplicationFactor of the keyspace. The
+ * different consistency levels have different meanings, depending on
+ * if you're doing a write or read operation.
*
+ * If W + R > ReplicationFactor, where W is the number of nodes to
+ * block for on write, and R the number to block for on reads, you
+ * will have strongly consistent behavior; that is, readers will
+ * always see the most recent write. Of these, the most interesting is
+ * to do QUORUM reads and writes, which gives you consistency while
+ * still allowing availability in the face of node failures up to half
+ * of <ReplicationFactor>. Of course if latency is more important than
+ * consistency then you can use lower values for either or both.
+ *
+ * Some ConsistencyLevels (ONE, TWO, THREE) refer to a specific number
+ * of replicas rather than a logical concept that adjusts
+ * automatically with the replication factor. Of these, only ONE is
+ * commonly used; TWO and (even more rarely) THREE are only useful
+ * when you care more about guaranteeing a certain level of
+ * durability, than consistency.
+ *
* Write consistency levels make the following guarantees before reporting success to the client:
* ANY Ensure that the write has been written once somewhere, including possibly being hinted in a non-target node.
* ONE Ensure that the write has been written to at least 1 node's commit log and memory table
+ * TWO Ensure that the write has been written to at least 2 node's commit log and memory table
+ * THREE Ensure that the write has been written to at least 3 node's commit log and memory table
* QUORUM Ensure that the write has been written to <ReplicationFactor> / 2 + 1 nodes
* LOCAL_QUORUM Ensure that the write has been written to <ReplicationFactor> / 2 + 1 nodes, within the local datacenter (requires NetworkTopologyStrategy)
* EACH_QUORUM Ensure that the write has been written to <ReplicationFactor> / 2 + 1 nodes in each datacenter (requires NetworkTopologyStrategy)
* ALL Ensure that the write is written to <code><ReplicationFactor></code> nodes before responding to the client.
- *
- * Read:
+ *
+ * Read consistency levels make the following guarantees before returning successful results to the client:
* ANY Not supported. You probably want ONE instead.
- * ONE Will return the record returned by the first node to respond. A consistency check is always done in a background thread to fix any consistency issues when ConsistencyLevel.ONE is used. This means subsequent calls will have correct data even if the initial read gets an older value. (This is called 'read repair'.)
- * QUORUM Will query all storage nodes and return the record with the most recent timestamp once it has at least a majority of replicas reported. Again, the remaining replicas will be checked in the background.
+ * ONE Returns the record obtained from a single replica.
+ * TWO Returns the record with the most recent timestamp once two replicas have replied.
+ * THREE Returns the record with the most recent timestamp once three replicas have replied.
+ * QUORUM Returns the record with the most recent timestamp once a majority of replicas have replied.
* LOCAL_QUORUM Returns the record with the most recent timestamp once a majority of replicas within the local datacenter have replied.
* EACH_QUORUM Returns the record with the most recent timestamp once a majority of replicas within each datacenter have replied.
- * ALL Queries all storage nodes and returns the record with the most recent timestamp.
+ * ALL Returns the record with the most recent timestamp once all replicas have replied (implies no replica may be down)..
*/
enum ConsistencyLevel {
ONE = 1,
@@ -161,6 +179,8 @@ enum ConsistencyLevel {
EACH_QUORUM = 4,
ALL = 5,
ANY = 6,
+ TWO = 7,
+ THREE = 8,
}
/**
Modified: cassandra/branches/cassandra-0.7/interface/thrift/gen-java/org/apache/cassandra/thrift/ConsistencyLevel.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/interface/thrift/gen-java/org/apache/cassandra/thrift/ConsistencyLevel.java?rev=1078134&r1=1078133&r2=1078134&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/interface/thrift/gen-java/org/apache/cassandra/thrift/ConsistencyLevel.java (original)
+++ cassandra/branches/cassandra-0.7/interface/thrift/gen-java/org/apache/cassandra/thrift/ConsistencyLevel.java Fri Mar 4 21:08:21 2011
@@ -32,8 +32,8 @@ import java.util.HashMap;
import org.apache.thrift.TEnum;
/**
- * The ConsistencyLevel is an enum that controls both read and write behavior based on <ReplicationFactor> in your
- * storage-conf.xml. The different consistency levels have different meanings, depending on if you're doing a write or read
+ * The ConsistencyLevel is an enum that controls both read and write behavior based on the ReplicationFactor of the keyspace.
+ * The different consistency levels have different meanings, depending on if you're doing a write or read
* operation. Note that if W + R > ReplicationFactor, where W is the number of nodes to block for on write, and R
* the number to block for on reads, you will have strongly consistent behavior; that is, readers will always see the most
* recent write. Of these, the most interesting is to do QUORUM reads and writes, which gives you consistency while still
@@ -43,18 +43,22 @@ import org.apache.thrift.TEnum;
* Write consistency levels make the following guarantees before reporting success to the client:
* ANY Ensure that the write has been written once somewhere, including possibly being hinted in a non-target node.
* ONE Ensure that the write has been written to at least 1 node's commit log and memory table
+ * TWO Ensure that the write has been written to at least 2 node's commit log and memory table
+ * THREE Ensure that the write has been written to at least 3 node's commit log and memory table
* QUORUM Ensure that the write has been written to <ReplicationFactor> / 2 + 1 nodes
* LOCAL_QUORUM Ensure that the write has been written to <ReplicationFactor> / 2 + 1 nodes, within the local datacenter (requires NetworkTopologyStrategy)
* EACH_QUORUM Ensure that the write has been written to <ReplicationFactor> / 2 + 1 nodes in each datacenter (requires NetworkTopologyStrategy)
* ALL Ensure that the write is written to <code><ReplicationFactor></code> nodes before responding to the client.
*
- * Read:
+ * Read consistency levels make the following guarantees before returning successful results to the client:
* ANY Not supported. You probably want ONE instead.
- * ONE Will return the record returned by the first node to respond. A consistency check is always done in a background thread to fix any consistency issues when ConsistencyLevel.ONE is used. This means subsequent calls will have correct data even if the initial read gets an older value. (This is called 'read repair'.)
- * QUORUM Will query all storage nodes and return the record with the most recent timestamp once it has at least a majority of replicas reported. Again, the remaining replicas will be checked in the background.
+ * ONE Returns the record obtained from a single replica.
+ * TWO Returns the record with the most recent timestamp once two replicas have replied.
+ * THREE Returns the record with the most recent timestamp once three replicas have replied.
+ * QUORUM Returns the record with the most recent timestamp once a majority of replicas have replied.
* LOCAL_QUORUM Returns the record with the most recent timestamp once a majority of replicas within the local datacenter have replied.
* EACH_QUORUM Returns the record with the most recent timestamp once a majority of replicas within each datacenter have replied.
- * ALL Queries all storage nodes and returns the record with the most recent timestamp.
+ * ALL Returns the record with the most recent timestamp once all replicas have replied (implies no replica may be down)..
*/
public enum ConsistencyLevel implements TEnum {
ONE(1),
@@ -62,7 +66,9 @@ public enum ConsistencyLevel implements
LOCAL_QUORUM(3),
EACH_QUORUM(4),
ALL(5),
- ANY(6);
+ ANY(6),
+ TWO(7),
+ THREE(8);
private final int value;
@@ -95,6 +101,10 @@ public enum ConsistencyLevel implements
return ALL;
case 6:
return ANY;
+ case 7:
+ return TWO;
+ case 8:
+ return THREE;
default:
return null;
}
Modified: cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/ReadCallback.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/ReadCallback.java?rev=1078134&r1=1078133&r2=1078134&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/ReadCallback.java (original)
+++ cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/ReadCallback.java Fri Mar 4 21:08:21 2011
@@ -167,6 +167,10 @@ public class ReadCallback<T> implements
case ONE:
case ANY:
return 1;
+ case TWO:
+ return 2;
+ case THREE:
+ return 3;
case QUORUM:
return (Table.open(table).getReplicationStrategy().getReplicationFactor() / 2) + 1;
case ALL:
Modified: cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/WriteResponseHandler.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/WriteResponseHandler.java?rev=1078134&r1=1078133&r2=1078134&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/WriteResponseHandler.java (original)
+++ cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/WriteResponseHandler.java Fri Mar 4 21:08:21 2011
@@ -34,7 +34,7 @@ import org.apache.cassandra.thrift.Consi
import org.apache.cassandra.thrift.UnavailableException;
/**
- * Handles blocking writes for ONE, ANY, QUORUM, and ALL consistency levels.
+ * Handles blocking writes for ONE, ANY, TWO, THREE, QUORUM, and ALL consistency levels.
*/
public class WriteResponseHandler extends AbstractWriteResponseHandler
{
@@ -83,6 +83,12 @@ public class WriteResponseHandler extend
case ANY:
blockFor = 1;
break;
+ case TWO:
+ blockFor = 2;
+ break;
+ case THREE:
+ blockFor = 3;
+ break;
case QUORUM:
blockFor = (writeEndpoints.size() / 2) + 1;
break;
Modified: cassandra/branches/cassandra-0.7/test/unit/org/apache/cassandra/service/ConsistencyLevelTest.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/test/unit/org/apache/cassandra/service/ConsistencyLevelTest.java?rev=1078134&r1=1078133&r2=1078134&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/test/unit/org/apache/cassandra/service/ConsistencyLevelTest.java (original)
+++ cassandra/branches/cassandra-0.7/test/unit/org/apache/cassandra/service/ConsistencyLevelTest.java Fri Mar 4 21:08:21 2011
@@ -145,6 +145,18 @@ public class ConsistencyLevelTest extend
else
assertTrue(hintedNodes.size() > 0);
break;
+ case TWO:
+ if (isWriteUnavailable)
+ assertTrue(hintedNodes.size() < 2);
+ else
+ assertTrue(hintedNodes.size() >= 2);
+ break;
+ case THREE:
+ if (isWriteUnavailable)
+ assertTrue(hintedNodes.size() < 3);
+ else
+ assertTrue(hintedNodes.size() >= 3);
+ break;
case QUORUM:
if (isWriteUnavailable)
assertTrue(hintedNodes.size() < (replicationFactor / 2 + 1));