You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by jb...@apache.org on 2011/09/09 17:46:14 UTC
svn commit: r1167254 - in
/cassandra/trunk/src/java/org/apache/cassandra/service:
RowDigestResolver.java RowRepairResolver.java StorageProxy.java
Author: jbellis
Date: Fri Sep 9 15:46:13 2011
New Revision: 1167254
URL: http://svn.apache.org/viewvc?rev=1167254&view=rev
Log:
cleanup read path (StorageProxy side)
patch by jbellis; reviewed by slebrense for CASSANDRA-3161
Modified:
cassandra/trunk/src/java/org/apache/cassandra/service/RowDigestResolver.java
cassandra/trunk/src/java/org/apache/cassandra/service/RowRepairResolver.java
cassandra/trunk/src/java/org/apache/cassandra/service/StorageProxy.java
Modified: cassandra/trunk/src/java/org/apache/cassandra/service/RowDigestResolver.java
URL: http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/service/RowDigestResolver.java?rev=1167254&r1=1167253&r2=1167254&view=diff
==============================================================================
--- cassandra/trunk/src/java/org/apache/cassandra/service/RowDigestResolver.java (original)
+++ cassandra/trunk/src/java/org/apache/cassandra/service/RowDigestResolver.java Fri Sep 9 15:46:13 2011
@@ -33,7 +33,10 @@ public class RowDigestResolver extends A
{
super(key, table);
}
-
+
+ /**
+ * Special case of resolve() so that CL.ONE reads never throw DigestMismatchException in the foreground
+ */
public Row getData() throws IOException
{
for (Map.Entry<Message, ReadResponse> entry : replies.entrySet())
@@ -62,14 +65,10 @@ public class RowDigestResolver extends A
logger.debug("resolving " + replies.size() + " responses");
long startTime = System.currentTimeMillis();
- ColumnFamily data = null;
// validate digests against each other; throw immediately on mismatch.
- // also, collects data results into versions/endpoints lists.
- //
- // results are cleared as we process them, to avoid unnecessary duplication of work
- // when resolve() is called a second time for read repair on responses that were not
- // necessary to satisfy ConsistencyLevel.
+ // also extract the data reply, if any.
+ ColumnFamily data = null;
ByteBuffer digest = null;
for (Map.Entry<Message, ReadResponse> entry : replies.entrySet())
{
Modified: cassandra/trunk/src/java/org/apache/cassandra/service/RowRepairResolver.java
URL: http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/service/RowRepairResolver.java?rev=1167254&r1=1167253&r2=1167254&view=diff
==============================================================================
--- cassandra/trunk/src/java/org/apache/cassandra/service/RowRepairResolver.java (original)
+++ cassandra/trunk/src/java/org/apache/cassandra/service/RowRepairResolver.java Fri Sep 9 15:46:13 2011
@@ -27,6 +27,9 @@ import java.util.Collections;
import java.util.List;
import java.util.Map;
+import com.google.common.base.Function;
+import com.google.common.collect.Iterables;
+
import org.apache.cassandra.db.*;
import org.apache.cassandra.db.columniterator.IdentityQueryFilter;
import org.apache.cassandra.db.filter.QueryFilter;
@@ -59,45 +62,42 @@ public class RowRepairResolver extends A
{
if (logger.isDebugEnabled())
logger.debug("resolving " + replies.size() + " responses");
-
long startTime = System.currentTimeMillis();
- List<ColumnFamily> versions = new ArrayList<ColumnFamily>();
- List<InetAddress> endpoints = new ArrayList<InetAddress>();
-
- // case 1: validate digests against each other; throw immediately on mismatch.
- // also, collects data results into versions/endpoints lists.
- //
- // results are cleared as we process them, to avoid unnecessary duplication of work
- // when resolve() is called a second time for read repair on responses that were not
- // necessary to satisfy ConsistencyLevel.
- for (Map.Entry<Message, ReadResponse> entry : replies.entrySet())
- {
- Message message = entry.getKey();
- ReadResponse response = entry.getValue();
- assert !response.isDigestQuery();
- versions.add(response.row().cf);
- endpoints.add(message.getFrom());
- }
ColumnFamily resolved;
- if (versions.size() > 1)
+ if (replies.size() > 1)
{
- for (ColumnFamily cf : versions)
+ // compute maxLiveColumns to prevent short reads -- see https://issues.apache.org/jira/browse/CASSANDRA-2643
+ for (Map.Entry<Message, ReadResponse> entry : replies.entrySet())
{
+ ReadResponse response = entry.getValue();
+ assert !response.isDigestQuery() : "Received digest response to repair read from " + entry.getKey().getFrom();
+
+ ColumnFamily cf = response.row().cf;
int liveColumns = cf.getLiveColumnCount();
if (liveColumns > maxLiveColumns)
maxLiveColumns = liveColumns;
}
- resolved = resolveSuperset(versions);
+
+ // merge the row versions
+ resolved = resolveSuperset(Iterables.transform(replies.values(), new Function<ReadResponse, ColumnFamily>()
+ {
+ public ColumnFamily apply(ReadResponse response)
+ {
+ return response.row().cf;
+ }
+ }));
if (logger.isDebugEnabled())
logger.debug("versions merged");
- // resolved can be null even if versions doesn't have all nulls because of the call to removeDeleted in resolveSuperSet
+
+ // send updates to any replica that was missing part of the full row
+ // (resolved can be null even if versions doesn't have all nulls because of the call to removeDeleted in resolveSuperSet)
if (resolved != null)
- repairResults = scheduleRepairs(resolved, table, key, versions, endpoints);
+ repairResults = scheduleRepairs(resolved, table, key, replies);
}
else
{
- resolved = versions.get(0);
+ resolved = replies.values().iterator().next().row().cf;
}
if (logger.isDebugEnabled())
@@ -110,13 +110,15 @@ public class RowRepairResolver extends A
* For each row version, compare with resolved (the superset of all row versions);
* if it is missing anything, send a mutation to the endpoint it come from.
*/
- public static List<IAsyncResult> scheduleRepairs(ColumnFamily resolved, String table, DecoratedKey<?> key, List<ColumnFamily> versions, List<InetAddress> endpoints)
+ public static List<IAsyncResult> scheduleRepairs(ColumnFamily resolved, String table, DecoratedKey<?> key, Map<Message,ReadResponse> replies)
{
- List<IAsyncResult> results = new ArrayList<IAsyncResult>(versions.size());
+ List<IAsyncResult> results = new ArrayList<IAsyncResult>(replies.size());
- for (int i = 0; i < versions.size(); i++)
+ for (Map.Entry<Message, ReadResponse> entry : replies.entrySet())
{
- ColumnFamily diffCf = ColumnFamily.diff(versions.get(i), resolved);
+ InetAddress from = entry.getKey().getFrom();
+ ColumnFamily cf = entry.getValue().row().cf;
+ ColumnFamily diffCf = ColumnFamily.diff(cf, resolved);
if (diffCf == null) // no repair needs to happen
continue;
@@ -126,21 +128,21 @@ public class RowRepairResolver extends A
Message repairMessage;
try
{
- repairMessage = rowMutation.getMessage(Gossiper.instance.getVersion(endpoints.get(i)));
+ repairMessage = rowMutation.getMessage(Gossiper.instance.getVersion(from));
}
catch (IOException e)
{
throw new IOError(e);
}
- results.add(MessagingService.instance().sendRR(repairMessage, endpoints.get(i)));
+ results.add(MessagingService.instance().sendRR(repairMessage, from));
}
return results;
}
- static ColumnFamily resolveSuperset(List<ColumnFamily> versions)
+ static ColumnFamily resolveSuperset(Iterable<ColumnFamily> versions)
{
- assert versions.size() > 0;
+ assert Iterables.size(versions) > 0;
ColumnFamily resolved = null;
for (ColumnFamily cf : versions)
Modified: cassandra/trunk/src/java/org/apache/cassandra/service/StorageProxy.java
URL: http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/service/StorageProxy.java?rev=1167254&r1=1167253&r2=1167254&view=diff
==============================================================================
--- cassandra/trunk/src/java/org/apache/cassandra/service/StorageProxy.java (original)
+++ cassandra/trunk/src/java/org/apache/cassandra/service/StorageProxy.java Fri Sep 9 15:46:13 2011
@@ -573,24 +573,23 @@ public class StorageProxy implements Sto
* 4. If the digests (if any) match the data return the data
* 5. else carry out read repair by getting data from all the nodes.
*/
- private static List<Row> fetchRows(List<ReadCommand> commands, ConsistencyLevel consistency_level) throws IOException, UnavailableException, TimeoutException
+ private static List<Row> fetchRows(List<ReadCommand> initialCommands, ConsistencyLevel consistency_level) throws IOException, UnavailableException, TimeoutException
{
- List<ReadCallback<Row>> readCallbacks = new ArrayList<ReadCallback<Row>>();
- List<Row> rows = new ArrayList<Row>();
+ List<Row> rows = new ArrayList<Row>(initialCommands.size());
List<ReadCommand> commandsToRetry = Collections.emptyList();
- List<ReadCommand> repairCommands = Collections.emptyList();
do
{
- readCallbacks.clear();
- List<ReadCommand> commandsToSend = commandsToRetry.isEmpty() ? commands : commandsToRetry;
+ List<ReadCommand> commands = commandsToRetry.isEmpty() ? initialCommands : commandsToRetry;
+ ReadCallback<Row>[] readCallbacks = new ReadCallback[commands.size()];
if (!commandsToRetry.isEmpty())
logger.debug("Retrying {} commands", commandsToRetry.size());
// send out read requests
- for (ReadCommand command : commandsToSend)
+ for (int i = 0; i < commands.size(); i++)
{
+ ReadCommand command = commands.get(i);
assert !command.isDigestQuery();
logger.debug("Command/ConsistencyLevel is {}/{}", command, consistency_level);
@@ -602,7 +601,7 @@ public class StorageProxy implements Sto
ReadCallback<Row> handler = getReadCallback(resolver, command, consistency_level, endpoints);
handler.assureSufficientLiveNodes();
assert !handler.endpoints.isEmpty();
- readCallbacks.add(handler);
+ readCallbacks[i] = handler;
// The data-request message is sent to dataPoint, the node that will actually get the data for us
InetAddress dataPoint = handler.endpoints.get(0);
@@ -643,15 +642,13 @@ public class StorageProxy implements Sto
}
}
- if (repairCommands != Collections.EMPTY_LIST)
- repairCommands.clear();
-
// read results and make a second pass for any digest mismatches
+ List<ReadCommand> repairCommands = null;
List<RepairCallback> repairResponseHandlers = null;
- for (int i = 0; i < commandsToSend.size(); i++)
+ for (int i = 0; i < commands.size(); i++)
{
- ReadCallback<Row> handler = readCallbacks.get(i);
- ReadCommand command = commandsToSend.get(i);
+ ReadCallback<Row> handler = readCallbacks[i];
+ ReadCommand command = commands.get(i);
try
{
long startTime2 = System.currentTimeMillis();
@@ -675,17 +672,17 @@ public class StorageProxy implements Sto
RowRepairResolver resolver = new RowRepairResolver(command.table, command.key);
RepairCallback repairHandler = new RepairCallback(resolver, handler.endpoints);
- if (repairCommands == Collections.EMPTY_LIST)
+ if (repairCommands == null)
+ {
repairCommands = new ArrayList<ReadCommand>();
+ repairResponseHandlers = new ArrayList<RepairCallback>();
+ }
repairCommands.add(command);
+ repairResponseHandlers.add(repairHandler);
MessageProducer producer = new CachingMessageProducer(command);
for (InetAddress endpoint : handler.endpoints)
MessagingService.instance().sendRR(producer, endpoint, repairHandler);
-
- if (repairResponseHandlers == null)
- repairResponseHandlers = new ArrayList<RepairCallback>();
- repairResponseHandlers.add(repairHandler);
}
}