You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by sl...@apache.org on 2015/12/23 12:08:33 UTC

[3/3] cassandra git commit: Skip full cells when we trivially can

Skip full cells when we trivially can


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/7dc4ae73
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/7dc4ae73
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/7dc4ae73

Branch: refs/heads/10657
Commit: 7dc4ae73f780c62493d5627601a1e9fb889230b6
Parents: 9666e67
Author: Sylvain Lebresne <sy...@datastax.com>
Authored: Wed Dec 23 12:07:11 2015 +0100
Committer: Sylvain Lebresne <sy...@datastax.com>
Committed: Wed Dec 23 12:08:06 2015 +0100

----------------------------------------------------------------------
 .../cassandra/db/filter/ColumnFilter.java       | 10 ++++++++
 .../org/apache/cassandra/db/rows/BTreeRow.java  | 19 +++++++++++----
 .../cassandra/db/rows/ComplexColumnData.java    | 16 +++++++------
 .../cassandra/db/rows/SerializationHelper.java  | 25 ++++++++++++++++++++
 .../cassandra/db/rows/UnfilteredSerializer.java |  4 ++--
 5 files changed, 60 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/7dc4ae73/src/java/org/apache/cassandra/db/filter/ColumnFilter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/filter/ColumnFilter.java b/src/java/org/apache/cassandra/db/filter/ColumnFilter.java
index 60cfad5..53be6e8 100644
--- a/src/java/org/apache/cassandra/db/filter/ColumnFilter.java
+++ b/src/java/org/apache/cassandra/db/filter/ColumnFilter.java
@@ -105,6 +105,16 @@ public class ColumnFilter
         return isFetchAll ? metadata.partitionColumns() : selection;
     }
 
+    /**
+     * The columns actually queried by the user.
+     * <p>
+     * Note that this is in general not all the columns that are fetched internally (see {@link #fetchedColumns}).
+     */
+    public PartitionColumns queriedByUser()
+    {
+        return selection;
+    }
+
     public boolean includesAllColumns()
     {
         return isFetchAll;

http://git-wip-us.apache.org/repos/asf/cassandra/blob/7dc4ae73/src/java/org/apache/cassandra/db/rows/BTreeRow.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/rows/BTreeRow.java b/src/java/org/apache/cassandra/db/rows/BTreeRow.java
index a91ea9d..a25a69e 100644
--- a/src/java/org/apache/cassandra/db/rows/BTreeRow.java
+++ b/src/java/org/apache/cassandra/db/rows/BTreeRow.java
@@ -237,10 +237,12 @@ public class BTreeRow extends AbstractRow
     {
         Map<ByteBuffer, CFMetaData.DroppedColumn> droppedColumns = metadata.getDroppedColumns();
 
-        if (filter.includesAllColumns() && (activeDeletion.isLive() || deletion.supersedes(activeDeletion)) && droppedColumns.isEmpty())
+        boolean mayFilterColumns = !filter.includesAllColumns() || filter.skipSomeValues();
+        boolean mayHaveShadowed = activeDeletion.supersedes(deletion.time());
+
+        if (!mayFilterColumns && !mayHaveShadowed && droppedColumns.isEmpty())
             return this;
 
-        boolean mayHaveShadowed = activeDeletion.supersedes(deletion.time());
 
         LivenessInfo newInfo = primaryKeyLivenessInfo;
         Deletion newDeletion = deletion;
@@ -255,6 +257,8 @@ public class BTreeRow extends AbstractRow
 
         Columns columns = filter.fetchedColumns().columns(isStatic());
         Predicate<ColumnDefinition> inclusionTester = columns.inOrderInclusionTester();
+        Predicate<ColumnDefinition> queriedByUserTester = filter.queriedByUser().columns(isStatic()).inOrderInclusionTester();
+        final LivenessInfo rowLiveness = newInfo;
         return transformAndFilter(newInfo, newDeletion, (cd) -> {
 
             ColumnDefinition column = cd.column();
@@ -263,11 +267,16 @@ public class BTreeRow extends AbstractRow
 
             CFMetaData.DroppedColumn dropped = droppedColumns.get(column.name.bytes);
             if (column.isComplex())
-                return ((ComplexColumnData) cd).filter(filter, mayHaveShadowed ? activeDeletion : DeletionTime.LIVE, dropped);
+                return ((ComplexColumnData) cd).filter(filter, mayHaveShadowed ? activeDeletion : DeletionTime.LIVE, dropped, rowLiveness);
 
             Cell cell = (Cell) cd;
-            return (dropped == null || cell.timestamp() > dropped.droppedTime) && !(mayHaveShadowed && activeDeletion.deletes(cell))
-                   ? cell : null;
+            // We include the cell unless it is 1) shadowed, 2) for a dropped column or 3) skippable.
+            // And a cell is skippable if it is for a column that is not selected by the user (canSkipValue) and its timestamp
+            // is lower than the row timestamp (see #10657 or SerializationHelper.includes() for details).
+            boolean isForDropped = dropped != null && cell.timestamp() <= dropped.droppedTime;
+            boolean isShadowed = mayHaveShadowed && activeDeletion.deletes(cell);
+            boolean isSkippable = !queriedByUserTester.test(column) && cell.timestamp() < rowLiveness.timestamp();
+            return isForDropped || isShadowed || isSkippable ? null : cell;
         });
     }
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/7dc4ae73/src/java/org/apache/cassandra/db/rows/ComplexColumnData.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/rows/ComplexColumnData.java b/src/java/org/apache/cassandra/db/rows/ComplexColumnData.java
index bf2b39c..facf6a9 100644
--- a/src/java/org/apache/cassandra/db/rows/ComplexColumnData.java
+++ b/src/java/org/apache/cassandra/db/rows/ComplexColumnData.java
@@ -29,6 +29,7 @@ import org.apache.cassandra.config.CFMetaData;
 import org.apache.cassandra.config.ColumnDefinition;
 import org.apache.cassandra.db.DeletionTime;
 import org.apache.cassandra.db.DeletionPurger;
+import org.apache.cassandra.db.LivenessInfo;
 import org.apache.cassandra.db.filter.ColumnFilter;
 import org.apache.cassandra.db.marshal.ByteType;
 import org.apache.cassandra.db.marshal.SetType;
@@ -144,19 +145,20 @@ public class ComplexColumnData extends ColumnData implements Iterable<Cell>
         return transformAndFilter(complexDeletion, Cell::markCounterLocalToBeCleared);
     }
 
-    public ComplexColumnData filter(ColumnFilter filter, DeletionTime activeDeletion, CFMetaData.DroppedColumn dropped)
+    public ComplexColumnData filter(ColumnFilter filter, DeletionTime activeDeletion, CFMetaData.DroppedColumn dropped, LivenessInfo rowLiveness)
     {
         ColumnFilter.Tester cellTester = filter.newTester(column);
         if (cellTester == null && activeDeletion.isLive() && dropped == null)
             return this;
 
         DeletionTime newDeletion = activeDeletion.supersedes(complexDeletion) ? DeletionTime.LIVE : complexDeletion;
-        return transformAndFilter(newDeletion,
-                                  (cell) ->
-                                           (cellTester == null || cellTester.includes(cell.path()))
-                                        && !activeDeletion.deletes(cell)
-                                        && (dropped == null || cell.timestamp() > dropped.droppedTime)
-                                           ? cell : null);
+        return transformAndFilter(newDeletion, (cell) ->
+        {
+            boolean isForDropped = dropped != null && cell.timestamp() <= dropped.droppedTime;
+            boolean isShadowed = activeDeletion.deletes(cell);
+            boolean isSkippable = cellTester != null && (!cellTester.includes(cell.path()) || (cellTester.canSkipValue(cell.path()) && cell.timestamp() < rowLiveness.timestamp()));
+            return isForDropped || isShadowed || isSkippable ? null : cell;
+        });
     }
 
     public ComplexColumnData purge(DeletionPurger purger, int nowInSec)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/7dc4ae73/src/java/org/apache/cassandra/db/rows/SerializationHelper.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/rows/SerializationHelper.java b/src/java/org/apache/cassandra/db/rows/SerializationHelper.java
index 6b4bc2e..534ee93 100644
--- a/src/java/org/apache/cassandra/db/rows/SerializationHelper.java
+++ b/src/java/org/apache/cassandra/db/rows/SerializationHelper.java
@@ -82,6 +82,31 @@ public class SerializationHelper
         return columnsToFetch == null || columnsToFetch.includes(column);
     }
 
+    public boolean includes(Cell cell, LivenessInfo rowLiveness)
+    {
+        if (columnsToFetch == null)
+            return true;
+
+        // During queries, some columns are included even though they are not queried by the user because
+        // we always need to distinguish between having a row (with potentially only null values) and not
+        // having a row at all (see #CASSANDRA-7085 for background). In the case where the column is not
+        // actually requested by the user however (canSkipValue), we can skip the full cell if the cell
+        // timestamp is lower than the row one, because in that case, the row timestamp is enough proof
+        // of the liveness of the row. Otherwise, we'll only be able to skip the values of those cells.
+        ColumnDefinition column = cell.column();
+        if (column.isComplex())
+        {
+            if (!includes(cell.path()))
+                return false;
+
+            return !canSkipValue(cell.path()) || cell.timestamp() > rowLiveness.timestamp();
+        }
+        else
+        {
+            return !columnsToFetch.canSkipValue(column) || cell.timestamp() > rowLiveness.timestamp();
+        }
+    }
+
     public boolean includes(CellPath path)
     {
         return path == null || tester == null || tester.includes(path);

http://git-wip-us.apache.org/repos/asf/cassandra/blob/7dc4ae73/src/java/org/apache/cassandra/db/rows/UnfilteredSerializer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/rows/UnfilteredSerializer.java b/src/java/org/apache/cassandra/db/rows/UnfilteredSerializer.java
index 4efc5eb..0511f8f 100644
--- a/src/java/org/apache/cassandra/db/rows/UnfilteredSerializer.java
+++ b/src/java/org/apache/cassandra/db/rows/UnfilteredSerializer.java
@@ -451,7 +451,7 @@ public class UnfilteredSerializer
         if (helper.includes(column))
         {
             Cell cell = Cell.serializer.deserialize(in, rowLiveness, column, header, helper);
-            if (!helper.isDropped(cell, false))
+            if (helper.includes(cell, rowLiveness) && !helper.isDropped(cell, false))
                 builder.addCell(cell);
         }
         else
@@ -477,7 +477,7 @@ public class UnfilteredSerializer
             while (--count >= 0)
             {
                 Cell cell = Cell.serializer.deserialize(in, rowLiveness, column, header, helper);
-                if (helper.includes(cell.path()) && !helper.isDropped(cell, true))
+                if (helper.includes(cell, rowLiveness) && !helper.isDropped(cell, true))
                     builder.addCell(cell);
             }