You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by be...@apache.org on 2015/09/04 14:47:44 UTC

[1/3] cassandra git commit: Improve efficiency of Row

Repository: cassandra
Updated Branches:
  refs/heads/cassandra-3.0 ce63ccc84 -> aa5762633
  refs/heads/trunk 356dd5b31 -> ae89731f0


Improve efficiency of Row

Removes need to store a Columns instance within a Row,
avoiding both the cost of merging and storing these.

Improves algorithmic efficiency of Rows.merge by performing
a straight forward linear merge on the present ColumnData.

Improves algorithmic efficiency of Rows.diff by utilising MergeIterator

patch by benedict; reviewed by marcus for CASSANDRA-10193


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/aa576263
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/aa576263
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/aa576263

Branch: refs/heads/cassandra-3.0
Commit: aa5762633a06e229cd9122983eec94bdb74fc23a
Parents: ce63ccc
Author: Benedict Elliott Smith <be...@apache.org>
Authored: Wed Aug 26 19:02:03 2015 +0100
Committer: Benedict Elliott Smith <be...@apache.org>
Committed: Fri Sep 4 13:47:07 2015 +0100

----------------------------------------------------------------------
 .../apache/cassandra/cql3/UpdateParameters.java |   4 +-
 src/java/org/apache/cassandra/db/Columns.java   |   2 +-
 .../org/apache/cassandra/db/LegacyLayout.java   |   4 +-
 .../apache/cassandra/db/RowUpdateBuilder.java   |   6 +-
 .../cassandra/db/SerializationHeader.java       |   1 -
 .../cassandra/db/UnfilteredDeserializer.java    |   2 +-
 .../db/compaction/CompactionIterator.java       |   4 +-
 .../db/partitions/AtomicBTreePartition.java     |   8 +-
 .../db/partitions/PartitionUpdate.java          |  10 +-
 .../apache/cassandra/db/rows/AbstractRow.java   |   1 -
 .../org/apache/cassandra/db/rows/BTreeRow.java  |  64 +++---
 src/java/org/apache/cassandra/db/rows/Row.java  |  15 +-
 src/java/org/apache/cassandra/db/rows/Rows.java | 214 ++++++++++---------
 .../rows/UnfilteredRowIteratorSerializer.java   |   2 +-
 .../db/rows/UnfilteredRowIterators.java         |  21 +-
 .../cassandra/db/rows/UnfilteredSerializer.java |   2 +-
 .../cassandra/db/view/MaterializedView.java     |   6 +-
 .../cassandra/index/SecondaryIndexManager.java  |  12 +-
 .../transactions/CompactionTransaction.java     |   5 +-
 .../io/sstable/SSTableSimpleIterator.java       |   2 +-
 .../apache/cassandra/service/DataResolver.java  |   6 +-
 .../cassandra/thrift/ThriftResultsMerger.java   |   6 +-
 .../utils/memory/AbstractAllocator.java         |   8 +-
 .../utils/memory/MemtableAllocator.java         |   2 +-
 .../utils/memory/MemtableBufferAllocator.java   |   5 +-
 .../cassandra/utils/memory/NativeAllocator.java |   2 +-
 test/unit/org/apache/cassandra/db/RowTest.java  |   2 +-
 .../cassandra/triggers/TriggerExecutorTest.java |   2 +-
 28 files changed, 210 insertions(+), 208 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/cql3/UpdateParameters.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/cql3/UpdateParameters.java b/src/java/org/apache/cassandra/cql3/UpdateParameters.java
index 22aeddd..cd81f84 100644
--- a/src/java/org/apache/cassandra/cql3/UpdateParameters.java
+++ b/src/java/org/apache/cassandra/cql3/UpdateParameters.java
@@ -121,13 +121,13 @@ public class UpdateParameters
         if (clustering == Clustering.STATIC_CLUSTERING)
         {
             if (staticBuilder == null)
-                staticBuilder = BTreeRow.unsortedBuilder(updatedColumns.statics, nowInSec);
+                staticBuilder = BTreeRow.unsortedBuilder(nowInSec);
             builder = staticBuilder;
         }
         else
         {
             if (regularBuilder == null)
-                regularBuilder = BTreeRow.unsortedBuilder(updatedColumns.regulars, nowInSec);
+                regularBuilder = BTreeRow.unsortedBuilder(nowInSec);
             builder = regularBuilder;
         }
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/Columns.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/Columns.java b/src/java/org/apache/cassandra/db/Columns.java
index 46e8401..582b742 100644
--- a/src/java/org/apache/cassandra/db/Columns.java
+++ b/src/java/org/apache/cassandra/db/Columns.java
@@ -86,7 +86,7 @@ public class Columns extends AbstractCollection<ColumnDefinition> implements Col
      * @param s the set from which to create the new {@code Columns}.
      * @return the newly created {@code Columns} containing the columns from {@code s}.
      */
-    public static Columns from(Set<ColumnDefinition> s)
+    public static Columns from(Collection<ColumnDefinition> s)
     {
         Object[] tree = BTree.<ColumnDefinition>builder(Comparator.naturalOrder()).addAll(s).build();
         return new Columns(tree, findFirstComplexIdx(tree));

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/LegacyLayout.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/LegacyLayout.java b/src/java/org/apache/cassandra/db/LegacyLayout.java
index 7b03e46..b6f6657 100644
--- a/src/java/org/apache/cassandra/db/LegacyLayout.java
+++ b/src/java/org/apache/cassandra/db/LegacyLayout.java
@@ -539,7 +539,7 @@ public abstract class LegacyLayout
         for (ColumnDefinition column : statics)
             columnsToFetch.add(column.name.bytes);
 
-        Row.Builder builder = BTreeRow.unsortedBuilder(statics, FBUtilities.nowInSeconds());
+        Row.Builder builder = BTreeRow.unsortedBuilder(FBUtilities.nowInSeconds());
         builder.newRow(Clustering.STATIC_CLUSTERING);
 
         boolean foundOne = false;
@@ -1058,7 +1058,7 @@ public abstract class LegacyLayout
             // We cannot use a sorted builder because we don't have exactly the same ordering in 3.0 and pre-3.0. More precisely, within a row, we
             // store all simple columns before the complex ones in 3.0, which we use to sort everything sorted by the column name before. Note however
             // that the unsorted builder won't have to reconcile cells, so the exact value we pass for nowInSec doesn't matter.
-            this.builder = BTreeRow.unsortedBuilder(isStatic ? metadata.partitionColumns().statics : metadata.partitionColumns().regulars, FBUtilities.nowInSeconds());
+            this.builder = BTreeRow.unsortedBuilder(FBUtilities.nowInSeconds());
         }
 
         public static CellGrouper staticGrouper(CFMetaData metadata, SerializationHelper helper)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/RowUpdateBuilder.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/RowUpdateBuilder.java b/src/java/org/apache/cassandra/db/RowUpdateBuilder.java
index 65f08b4..8e6ce3e 100644
--- a/src/java/org/apache/cassandra/db/RowUpdateBuilder.java
+++ b/src/java/org/apache/cassandra/db/RowUpdateBuilder.java
@@ -82,7 +82,7 @@ public class RowUpdateBuilder
         assert staticBuilder == null : "Cannot update both static and non-static columns with the same RowUpdateBuilder object";
         assert regularBuilder == null : "Cannot add the clustering twice to the same row";
 
-        regularBuilder = BTreeRow.unsortedBuilder(update.columns().regulars, FBUtilities.nowInSeconds());
+        regularBuilder = BTreeRow.unsortedBuilder(FBUtilities.nowInSeconds());
         regularBuilder.newRow(clustering);
 
         // If a CQL table, add the "row marker"
@@ -107,7 +107,7 @@ public class RowUpdateBuilder
         assert regularBuilder == null : "Cannot update both static and non-static columns with the same RowUpdateBuilder object";
         if (staticBuilder == null)
         {
-            staticBuilder = BTreeRow.unsortedBuilder(update.columns().statics, FBUtilities.nowInSeconds());
+            staticBuilder = BTreeRow.unsortedBuilder(FBUtilities.nowInSeconds());
             staticBuilder.newRow(Clustering.STATIC_CLUSTERING);
         }
         return staticBuilder;
@@ -188,7 +188,7 @@ public class RowUpdateBuilder
         assert clusteringValues.length == update.metadata().comparator.size() || (clusteringValues.length == 0 && !update.columns().statics.isEmpty());
 
         boolean isStatic = clusteringValues.length != update.metadata().comparator.size();
-        Row.Builder builder = BTreeRow.sortedBuilder(isStatic ? update.columns().statics : update.columns().regulars);
+        Row.Builder builder = BTreeRow.sortedBuilder();
 
         if (isStatic)
             builder.newRow(Clustering.STATIC_CLUSTERING);

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/SerializationHeader.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/SerializationHeader.java b/src/java/org/apache/cassandra/db/SerializationHeader.java
index a840f02..582a6af 100644
--- a/src/java/org/apache/cassandra/db/SerializationHeader.java
+++ b/src/java/org/apache/cassandra/db/SerializationHeader.java
@@ -23,7 +23,6 @@ import java.util.*;
 
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
-import com.google.common.base.Function;
 
 import org.apache.cassandra.config.CFMetaData;
 import org.apache.cassandra.config.ColumnDefinition;

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java b/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java
index 2f75f34..d47da3c 100644
--- a/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java
+++ b/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java
@@ -121,7 +121,7 @@ public abstract class UnfilteredDeserializer
             super(metadata, in, helper);
             this.header = header;
             this.clusteringDeserializer = new ClusteringPrefix.Deserializer(metadata.comparator, in, header);
-            this.builder = BTreeRow.sortedBuilder(helper.fetchedRegularColumns(header));
+            this.builder = BTreeRow.sortedBuilder();
         }
 
         public boolean hasNext() throws IOException

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java
index 4aaa17a..fe18c04 100644
--- a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java
+++ b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java
@@ -187,10 +187,10 @@ public class CompactionIterator extends CompactionInfo.Holder implements Unfilte
                     {
                     }
 
-                    public void onMergedRows(Row merged, Columns columns, Row[] versions)
+                    public void onMergedRows(Row merged, Row[] versions)
                     {
                         indexTransaction.start();
-                        indexTransaction.onRowMerge(columns, merged, versions);
+                        indexTransaction.onRowMerge(merged, versions);
                         indexTransaction.commit();
                     }
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/partitions/AtomicBTreePartition.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/partitions/AtomicBTreePartition.java b/src/java/org/apache/cassandra/db/partitions/AtomicBTreePartition.java
index d279a6b..e00a75e 100644
--- a/src/java/org/apache/cassandra/db/partitions/AtomicBTreePartition.java
+++ b/src/java/org/apache/cassandra/db/partitions/AtomicBTreePartition.java
@@ -263,10 +263,10 @@ public class AtomicBTreePartition extends AbstractBTreePartition
             boolean isStatic = clustering == Clustering.STATIC_CLUSTERING;
             // We know we only insert/update one static per PartitionUpdate, so no point in saving the builder
             if (isStatic)
-                return allocator.rowBuilder(updating.metadata(), writeOp, true);
+                return allocator.rowBuilder(writeOp);
 
             if (regularBuilder == null)
-                regularBuilder = allocator.rowBuilder(updating.metadata(), writeOp, false);
+                regularBuilder = allocator.rowBuilder(writeOp);
             return regularBuilder;
         }
 
@@ -285,10 +285,8 @@ public class AtomicBTreePartition extends AbstractBTreePartition
 
         public Row apply(Row existing, Row update)
         {
-            Columns mergedColumns = existing.columns().mergeTo(update.columns());
-
             Row.Builder builder = builder(existing.clustering());
-            colUpdateTimeDelta = Math.min(colUpdateTimeDelta, Rows.merge(existing, update, mergedColumns, builder, nowInSec));
+            colUpdateTimeDelta = Math.min(colUpdateTimeDelta, Rows.merge(existing, update, builder, nowInSec));
 
             Row reconciled = builder.build();
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/partitions/PartitionUpdate.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/partitions/PartitionUpdate.java b/src/java/org/apache/cassandra/db/partitions/PartitionUpdate.java
index 5e056d2..1a27b39 100644
--- a/src/java/org/apache/cassandra/db/partitions/PartitionUpdate.java
+++ b/src/java/org/apache/cassandra/db/partitions/PartitionUpdate.java
@@ -168,12 +168,12 @@ public class PartitionUpdate extends AbstractBTreePartition
         if (row.isStatic())
         {
             Holder holder = new Holder(BTree.empty(), deletionInfo, row, EncodingStats.NO_STATS);
-            return new PartitionUpdate(metadata, key, new PartitionColumns(row.columns(), Columns.NONE), holder, deletionInfo, false);
+            return new PartitionUpdate(metadata, key, new PartitionColumns(Columns.from(row.columns()), Columns.NONE), holder, deletionInfo, false);
         }
         else
         {
             Holder holder = new Holder(BTree.singleton(row), deletionInfo, Rows.EMPTY_STATIC_ROW, EncodingStats.NO_STATS);
-            return new PartitionUpdate(metadata, key, new PartitionColumns(Columns.NONE, row.columns()), holder, deletionInfo, false);
+            return new PartitionUpdate(metadata, key, new PartitionColumns(Columns.NONE, Columns.from(row.columns())), holder, deletionInfo, false);
         }
     }
 
@@ -528,7 +528,8 @@ public class PartitionUpdate extends AbstractBTreePartition
 
         if (row.isStatic())
         {
-            // We test for == first because in most case it'll be true and that is faster
+            // this assert is expensive, and possibly of limited value; we should consider removing it
+            // or introducing a new class of assertions for test purposes
             assert columns().statics.containsAll(row.columns()) : columns().statics + " is not superset of " + row.columns();
             Row staticRow = holder.staticRow.isEmpty()
                       ? row
@@ -537,7 +538,8 @@ public class PartitionUpdate extends AbstractBTreePartition
         }
         else
         {
-            // We test for == first because in most case it'll be true and that is faster
+            // this assert is expensive, and possibly of limited value; we should consider removing it
+            // or introducing a new class of assertions for test purposes
             assert columns().regulars.containsAll(row.columns()) : columns().regulars + " is not superset of " + row.columns();
             rowBuilder.add(row);
         }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/rows/AbstractRow.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/rows/AbstractRow.java b/src/java/org/apache/cassandra/db/rows/AbstractRow.java
index fca765f..555146e 100644
--- a/src/java/org/apache/cassandra/db/rows/AbstractRow.java
+++ b/src/java/org/apache/cassandra/db/rows/AbstractRow.java
@@ -154,7 +154,6 @@ public abstract class AbstractRow extends AbstractCollection<ColumnData> impleme
 
         Row that = (Row)other;
         if (!this.clustering().equals(that.clustering())
-             || !this.columns().equals(that.columns())
              || !this.primaryKeyLivenessInfo().equals(that.primaryKeyLivenessInfo())
              || !this.deletion().equals(that.deletion()))
             return false;

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/rows/BTreeRow.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/rows/BTreeRow.java b/src/java/org/apache/cassandra/db/rows/BTreeRow.java
index ed036af..653ffcd 100644
--- a/src/java/org/apache/cassandra/db/rows/BTreeRow.java
+++ b/src/java/org/apache/cassandra/db/rows/BTreeRow.java
@@ -23,6 +23,7 @@ import java.util.function.Predicate;
 
 import com.google.common.base.Function;
 import com.google.common.collect.Collections2;
+import com.google.common.collect.Iterables;
 import com.google.common.collect.Iterators;
 
 import org.apache.cassandra.config.CFMetaData;
@@ -36,7 +37,6 @@ import org.apache.cassandra.utils.ByteBufferUtil;
 import org.apache.cassandra.utils.ObjectSizes;
 import org.apache.cassandra.utils.btree.BTree;
 import org.apache.cassandra.utils.btree.BTreeSearchIterator;
-import org.apache.cassandra.utils.btree.BTreeSet;
 import org.apache.cassandra.utils.btree.UpdateFunction;
 
 /**
@@ -47,7 +47,6 @@ public class BTreeRow extends AbstractRow
     private static final long EMPTY_SIZE = ObjectSizes.measure(emptyRow(Clustering.EMPTY));
 
     private final Clustering clustering;
-    private final Columns columns;
     private final LivenessInfo primaryKeyLivenessInfo;
     private final DeletionTime deletion;
 
@@ -63,10 +62,9 @@ public class BTreeRow extends AbstractRow
     // no expiring cells, this will be Integer.MAX_VALUE;
     private final int minLocalDeletionTime;
 
-    private BTreeRow(Clustering clustering, Columns columns, LivenessInfo primaryKeyLivenessInfo, DeletionTime deletion, Object[] btree, int minLocalDeletionTime)
+    private BTreeRow(Clustering clustering, LivenessInfo primaryKeyLivenessInfo, DeletionTime deletion, Object[] btree, int minLocalDeletionTime)
     {
         this.clustering = clustering;
-        this.columns = columns;
         this.primaryKeyLivenessInfo = primaryKeyLivenessInfo;
         this.deletion = deletion;
         this.btree = btree;
@@ -74,7 +72,7 @@ public class BTreeRow extends AbstractRow
     }
 
     // Note that it's often easier/safer to use the sortedBuilder/unsortedBuilder or one of the static creation method below. Only directly useful in a small amount of cases.
-    public static BTreeRow create(Clustering clustering, Columns columns, LivenessInfo primaryKeyLivenessInfo, DeletionTime deletion, Object[] btree)
+    public static BTreeRow create(Clustering clustering, LivenessInfo primaryKeyLivenessInfo, DeletionTime deletion, Object[] btree)
     {
         int minDeletionTime = Math.min(minDeletionTime(primaryKeyLivenessInfo), minDeletionTime(deletion));
         if (minDeletionTime != Integer.MIN_VALUE)
@@ -83,33 +81,33 @@ public class BTreeRow extends AbstractRow
                 minDeletionTime = Math.min(minDeletionTime, minDeletionTime(cd));
         }
 
-        return new BTreeRow(clustering, columns, primaryKeyLivenessInfo, deletion, btree, minDeletionTime);
+        return new BTreeRow(clustering, primaryKeyLivenessInfo, deletion, btree, minDeletionTime);
     }
 
     public static BTreeRow emptyRow(Clustering clustering)
     {
-        return new BTreeRow(clustering, Columns.NONE, LivenessInfo.EMPTY, DeletionTime.LIVE, BTree.empty(), Integer.MAX_VALUE);
+        return new BTreeRow(clustering, LivenessInfo.EMPTY, DeletionTime.LIVE, BTree.empty(), Integer.MAX_VALUE);
     }
 
     public static BTreeRow singleCellRow(Clustering clustering, Cell cell)
     {
         if (cell.column().isSimple())
-            return new BTreeRow(clustering, Columns.of(cell.column()), LivenessInfo.EMPTY, DeletionTime.LIVE, BTree.singleton(cell), minDeletionTime(cell));
+            return new BTreeRow(clustering, LivenessInfo.EMPTY, DeletionTime.LIVE, BTree.singleton(cell), minDeletionTime(cell));
 
         ComplexColumnData complexData = new ComplexColumnData(cell.column(), new Cell[]{ cell }, DeletionTime.LIVE);
-        return new BTreeRow(clustering, Columns.of(cell.column()), LivenessInfo.EMPTY, DeletionTime.LIVE, BTree.singleton(complexData), minDeletionTime(cell));
+        return new BTreeRow(clustering, LivenessInfo.EMPTY, DeletionTime.LIVE, BTree.singleton(complexData), minDeletionTime(cell));
     }
 
     public static BTreeRow emptyDeletedRow(Clustering clustering, DeletionTime deletion)
     {
         assert !deletion.isLive();
-        return new BTreeRow(clustering, Columns.NONE, LivenessInfo.EMPTY, deletion, BTree.empty(), Integer.MIN_VALUE);
+        return new BTreeRow(clustering, LivenessInfo.EMPTY, deletion, BTree.empty(), Integer.MIN_VALUE);
     }
 
     public static BTreeRow noCellLiveRow(Clustering clustering, LivenessInfo primaryKeyLivenessInfo)
     {
         assert !primaryKeyLivenessInfo.isEmpty();
-        return new BTreeRow(clustering, Columns.NONE, primaryKeyLivenessInfo, DeletionTime.LIVE, BTree.empty(), minDeletionTime(primaryKeyLivenessInfo));
+        return new BTreeRow(clustering, primaryKeyLivenessInfo, DeletionTime.LIVE, BTree.empty(), minDeletionTime(primaryKeyLivenessInfo));
     }
 
     private static int minDeletionTime(Cell cell)
@@ -161,9 +159,9 @@ public class BTreeRow extends AbstractRow
         return clustering;
     }
 
-    public Columns columns()
+    public Collection<ColumnDefinition> columns()
     {
-        return columns;
+        return Collections2.transform(this, ColumnData::column);
     }
 
     public Collection<ColumnDefinition> actualColumns()
@@ -209,14 +207,14 @@ public class BTreeRow extends AbstractRow
         return (ComplexColumnData) BTree.<Object>find(btree, ColumnDefinition.asymmetricColumnDataComparator, c);
     }
 
-    public Iterator<ColumnData> iterator()
+    public int size()
     {
-        return searchIterator();
+        return BTree.size(btree);
     }
 
-    public int size()
+    public Iterator<ColumnData> iterator()
     {
-        return BTree.size(btree);
+        return searchIterator();
     }
 
     public Iterable<Cell> cells()
@@ -264,14 +262,21 @@ public class BTreeRow extends AbstractRow
 
             CFMetaData.DroppedColumn dropped = droppedColumns.get(column.name.bytes);
             if (column.isComplex())
-                return ((ComplexColumnData)cd).filter(filter, mayHaveShadowed ? activeDeletion : DeletionTime.LIVE, dropped);
+                return ((ComplexColumnData) cd).filter(filter, mayHaveShadowed ? activeDeletion : DeletionTime.LIVE, dropped);
 
-            Cell cell = (Cell)cd;
+            Cell cell = (Cell) cd;
             return (dropped == null || cell.timestamp() > dropped.droppedTime) && !(mayHaveShadowed && activeDeletion.deletes(cell))
                    ? cell : null;
         });
     }
 
+    public boolean hasComplex()
+    {
+        // We start by the end cause we know complex columns sort after the simple ones
+        ColumnData cd = Iterables.getFirst(BTree.<ColumnData>iterable(btree, BTree.Dir.DESC), null);
+        return cd != null && cd.column.isComplex();
+    }
+
     public boolean hasComplexDeletion()
     {
         // We start by the end cause we know complex columns sort before simple ones
@@ -334,7 +339,7 @@ public class BTreeRow extends AbstractRow
             return null;
 
         int minDeletionTime = minDeletionTime(transformed, info, deletion);
-        return new BTreeRow(clustering, columns, info, deletion, transformed, minDeletionTime);
+        return new BTreeRow(clustering, info, deletion, transformed, minDeletionTime);
     }
 
     public int dataSize()
@@ -359,14 +364,14 @@ public class BTreeRow extends AbstractRow
         return heapSize;
     }
 
-    public static Row.Builder sortedBuilder(Columns columns)
+    public static Row.Builder sortedBuilder()
     {
-        return new Builder(columns, true);
+        return new Builder(true);
     }
 
-    public static Row.Builder unsortedBuilder(Columns columns, int nowInSec)
+    public static Row.Builder unsortedBuilder(int nowInSec)
     {
-        return new Builder(columns, false, nowInSec);
+        return new Builder(false, nowInSec);
     }
 
     // This is only used by PartitionUpdate.CounterMark but other uses should be avoided as much as possible as it breaks our general
@@ -521,8 +526,6 @@ public class BTreeRow extends AbstractRow
             }
 
         };
-        protected final Columns columns;
-
         protected Clustering clustering;
         protected LivenessInfo primaryKeyLivenessInfo = LivenessInfo.EMPTY;
         protected DeletionTime deletion = DeletionTime.LIVE;
@@ -534,14 +537,13 @@ public class BTreeRow extends AbstractRow
 
         // For complex column at index i of 'columns', we store at complexDeletions[i] its complex deletion.
 
-        protected Builder(Columns columns, boolean isSorted)
+        protected Builder(boolean isSorted)
         {
-            this(columns, isSorted, Integer.MIN_VALUE);
+            this(isSorted, Integer.MIN_VALUE);
         }
 
-        protected Builder(Columns columns, boolean isSorted, int nowInSecs)
+        protected Builder(boolean isSorted, int nowInSecs)
         {
-            this.columns = columns;
             this.cells = BTree.builder(ColumnData.comparator);
             resolver = new CellResolver(nowInSecs);
             this.isSorted = isSorted;
@@ -605,7 +607,7 @@ public class BTreeRow extends AbstractRow
                 cells.resolve(resolver);
             Object[] btree = cells.build();
             int minDeletionTime = minDeletionTime(btree, primaryKeyLivenessInfo, deletion);
-            Row row = new BTreeRow(clustering, columns, primaryKeyLivenessInfo, deletion, btree, minDeletionTime);
+            Row row = new BTreeRow(clustering, primaryKeyLivenessInfo, deletion, btree, minDeletionTime);
             reset();
             return row;
         }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/rows/Row.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/rows/Row.java b/src/java/org/apache/cassandra/db/rows/Row.java
index 003dd04..1b18b38 100644
--- a/src/java/org/apache/cassandra/db/rows/Row.java
+++ b/src/java/org/apache/cassandra/db/rows/Row.java
@@ -60,7 +60,7 @@ public interface Row extends Unfiltered, Collection<ColumnData>
      *
      * @return a superset of the columns contained in this row.
      */
-    public Columns columns();
+    public Collection<ColumnDefinition> columns();
 
     /**
      * The row deletion.
@@ -163,6 +163,11 @@ public interface Row extends Unfiltered, Collection<ColumnData>
     public boolean hasComplexDeletion();
 
     /**
+     * Whether the row stores any (non-RT) data for any complex column.
+     */
+    boolean hasComplex();
+
+    /**
      * Whether the row has any deletion info (row deletion, cell tombstone, expired cell or complex deletion).
      *
      * @param nowInSec the current time in seconds to decid if a cell is expired.
@@ -312,7 +317,6 @@ public interface Row extends Unfiltered, Collection<ColumnData>
      */
     public static class Merger
     {
-        private final Columns columns;
         private final Row[] rows;
         private final List<Iterator<ColumnData>> columnDataIterators;
 
@@ -323,12 +327,11 @@ public interface Row extends Unfiltered, Collection<ColumnData>
         private final List<ColumnData> dataBuffer = new ArrayList<>();
         private final ColumnDataReducer columnDataReducer;
 
-        public Merger(int size, int nowInSec, Columns columns)
+        public Merger(int size, int nowInSec, boolean hasComplex)
         {
-            this.columns = columns;
             this.rows = new Row[size];
             this.columnDataIterators = new ArrayList<>(size);
-            this.columnDataReducer = new ColumnDataReducer(size, nowInSec, columns.hasComplex());
+            this.columnDataReducer = new ColumnDataReducer(size, nowInSec, hasComplex);
         }
 
         public void clear()
@@ -395,7 +398,7 @@ public interface Row extends Unfiltered, Collection<ColumnData>
             // Because some data might have been shadowed by the 'activeDeletion', we could have an empty row
             return rowInfo.isEmpty() && rowDeletion.isLive() && dataBuffer.isEmpty()
                  ? null
-                 : BTreeRow.create(clustering, columns, rowInfo, rowDeletion, BTree.build(dataBuffer, UpdateFunction.<ColumnData>noOp()));
+                 : BTreeRow.create(clustering, rowInfo, rowDeletion, BTree.build(dataBuffer, UpdateFunction.<ColumnData>noOp()));
         }
 
         public Clustering mergedClustering()

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/rows/Rows.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/rows/Rows.java b/src/java/org/apache/cassandra/db/rows/Rows.java
index ce177f2..bf9ed5e 100644
--- a/src/java/org/apache/cassandra/db/rows/Rows.java
+++ b/src/java/org/apache/cassandra/db/rows/Rows.java
@@ -17,9 +17,7 @@
  */
 package org.apache.cassandra.db.rows;
 
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
+import java.util.*;
 
 import com.google.common.collect.Iterators;
 import com.google.common.collect.PeekingIterator;
@@ -27,27 +25,13 @@ import com.google.common.collect.PeekingIterator;
 import org.apache.cassandra.config.ColumnDefinition;
 import org.apache.cassandra.db.*;
 import org.apache.cassandra.db.partitions.PartitionStatisticsCollector;
-import org.apache.cassandra.utils.SearchIterator;
+import org.apache.cassandra.utils.MergeIterator;
 
 /**
  * Static utilities to work on Row objects.
  */
 public abstract class Rows
 {
-    // TODO: we could have a that in a more generic place...
-    private static final SearchIterator<ColumnDefinition, ColumnData> EMPTY_SEARCH_ITERATOR = new SearchIterator<ColumnDefinition, ColumnData>()
-    {
-        public boolean hasNext()
-        {
-            return false;
-        }
-
-        public ColumnData next(ColumnDefinition column)
-        {
-            return null;
-        }
-    };
-
     private Rows() {}
 
     public static final Row EMPTY_STATIC_ROW = BTreeRow.emptyRow(Clustering.STATIC_CLUSTERING);
@@ -123,12 +107,11 @@ public abstract class Rows
      * each input and {@code merged} to {@code diffListener}.
      *
      * @param merged the result of merging {@code inputs}.
-     * @param columns a superset of all the columns in any of {@code merged}/{@code inputs}.
      * @param inputs the inputs whose merge yielded {@code merged}.
      * @param diffListener the listener to which to signal the differences between the inputs and the merged
      * result.
      */
-    public static void diff(RowDiffListener diffListener, Row merged, Columns columns, Row...inputs)
+    public static void diff(RowDiffListener diffListener, Row merged, Row...inputs)
     {
         Clustering clustering = merged.clustering();
         LivenessInfo mergedInfo = merged.primaryKeyLivenessInfo().isEmpty() ? null : merged.primaryKeyLivenessInfo();
@@ -145,81 +128,96 @@ public abstract class Rows
                 diffListener.onDeletion(i, clustering, mergedDeletion, inputDeletion);
         }
 
-        SearchIterator<ColumnDefinition, ColumnData> mergedIterator = merged.searchIterator();
-        List<SearchIterator<ColumnDefinition, ColumnData>> inputIterators = new ArrayList<>(inputs.length);
-
+        List<Iterator<ColumnData>> inputIterators = new ArrayList<>(1 + inputs.length);
+        inputIterators.add(merged.iterator());
         for (Row row : inputs)
-            inputIterators.add(row == null ? EMPTY_SEARCH_ITERATOR : row.searchIterator());
+            inputIterators.add(row == null ? Collections.emptyIterator() : row.iterator());
 
-        Iterator<ColumnDefinition> simpleColumns = columns.simpleColumns();
-        while (simpleColumns.hasNext())
+        Iterator<?> iter = MergeIterator.get(inputIterators, ColumnData.comparator, new MergeIterator.Reducer<ColumnData, Object>()
         {
-            ColumnDefinition column = simpleColumns.next();
-            Cell mergedCell = (Cell)mergedIterator.next(column);
-            for (int i = 0; i < inputs.length; i++)
+            ColumnData mergedData;
+            ColumnData[] inputDatas = new ColumnData[inputs.length];
+            public void reduce(int idx, ColumnData current)
             {
-                Cell inputCell = (Cell)inputIterators.get(i).next(column);
-                if (mergedCell != null || inputCell != null)
-                    diffListener.onCell(i, clustering, mergedCell, inputCell);
+                if (idx == 0)
+                    mergedData = current;
+                else
+                    inputDatas[idx - 1] = current;
             }
-        }
 
-        Iterator<ColumnDefinition> complexColumns = columns.complexColumns();
-        while (complexColumns.hasNext())
-        {
-            ColumnDefinition column = complexColumns.next();
-            ComplexColumnData mergedData = (ComplexColumnData)mergedIterator.next(column);
-            // Doing one input at a time is not the most efficient, but it's a lot simpler for now
-            for (int i = 0; i < inputs.length; i++)
+            protected Object getReduced()
             {
-                ComplexColumnData inputData = (ComplexColumnData)inputIterators.get(i).next(column);
-                if (mergedData == null)
-                {
-                    if (inputData == null)
-                        continue;
-
-                    // Everything in inputData has been shadowed
-                    if (!inputData.complexDeletion().isLive())
-                        diffListener.onComplexDeletion(i, clustering, column, null, inputData.complexDeletion());
-                    for (Cell inputCell : inputData)
-                        diffListener.onCell(i, clustering, null, inputCell);
-                }
-                else if (inputData == null)
-                {
-                    // Everything in inputData is new
-                    if (!mergedData.complexDeletion().isLive())
-                        diffListener.onComplexDeletion(i, clustering, column, mergedData.complexDeletion(), null);
-                    for (Cell mergedCell : mergedData)
-                        diffListener.onCell(i, clustering, mergedCell, null);
-                }
-                else
+                for (int i = 0 ; i != inputDatas.length ; i++)
                 {
-                    PeekingIterator<Cell> mergedCells = Iterators.peekingIterator(mergedData.iterator());
-                    PeekingIterator<Cell> inputCells = Iterators.peekingIterator(inputData.iterator());
-                    while (mergedCells.hasNext() && inputCells.hasNext())
+                    ColumnData input = inputDatas[i];
+                    if (mergedData != null || input != null)
                     {
-                        int cmp = column.cellPathComparator().compare(mergedCells.peek().path(), inputCells.peek().path());
-                        if (cmp == 0)
-                            diffListener.onCell(i, clustering, mergedCells.next(), inputCells.next());
-                        else if (cmp < 0)
-                            diffListener.onCell(i, clustering, mergedCells.next(), null);
-                        else // cmp > 0
-                            diffListener.onCell(i, clustering, null, inputCells.next());
+                        ColumnDefinition column = (mergedData != null ? mergedData : input).column;
+                        if (column.isSimple())
+                        {
+                            diffListener.onCell(i, clustering, (Cell) mergedData, (Cell) input);
+                        }
+                        else
+                        {
+                            ComplexColumnData mergedData = (ComplexColumnData) this.mergedData;
+                            ComplexColumnData inputData = (ComplexColumnData) input;
+                            if (mergedData == null)
+                            {
+                                // Everything in inputData has been shadowed
+                                if (!inputData.complexDeletion().isLive())
+                                    diffListener.onComplexDeletion(i, clustering, column, null, inputData.complexDeletion());
+                                for (Cell inputCell : inputData)
+                                    diffListener.onCell(i, clustering, null, inputCell);
+                            }
+                            else if (inputData == null)
+                            {
+                                // Everything in inputData is new
+                                if (!mergedData.complexDeletion().isLive())
+                                    diffListener.onComplexDeletion(i, clustering, column, mergedData.complexDeletion(), null);
+                                for (Cell mergedCell : mergedData)
+                                    diffListener.onCell(i, clustering, mergedCell, null);
+                            }
+                            else
+                            {
+                                PeekingIterator<Cell> mergedCells = Iterators.peekingIterator(mergedData.iterator());
+                                PeekingIterator<Cell> inputCells = Iterators.peekingIterator(inputData.iterator());
+                                while (mergedCells.hasNext() && inputCells.hasNext())
+                                {
+                                    int cmp = column.cellPathComparator().compare(mergedCells.peek().path(), inputCells.peek().path());
+                                    if (cmp == 0)
+                                        diffListener.onCell(i, clustering, mergedCells.next(), inputCells.next());
+                                    else if (cmp < 0)
+                                        diffListener.onCell(i, clustering, mergedCells.next(), null);
+                                    else // cmp > 0
+                                        diffListener.onCell(i, clustering, null, inputCells.next());
+                                }
+                                while (mergedCells.hasNext())
+                                    diffListener.onCell(i, clustering, mergedCells.next(), null);
+                                while (inputCells.hasNext())
+                                    diffListener.onCell(i, clustering, null, inputCells.next());
+                            }
+                        }
                     }
-                    while (mergedCells.hasNext())
-                        diffListener.onCell(i, clustering, mergedCells.next(), null);
-                    while (inputCells.hasNext())
-                        diffListener.onCell(i, clustering, null, inputCells.next());
+
                 }
+                return null;
             }
-        }
+
+            protected void onKeyChange()
+            {
+                mergedData = null;
+                Arrays.fill(inputDatas, null);
+            }
+        });
+
+        while (iter.hasNext())
+            iter.next();
     }
 
     public static Row merge(Row row1, Row row2, int nowInSec)
     {
-        Columns mergedColumns = row1.columns().mergeTo(row2.columns());
-        Row.Builder builder = BTreeRow.sortedBuilder(mergedColumns);
-        merge(row1, row2, mergedColumns, builder, nowInSec);
+        Row.Builder builder = BTreeRow.sortedBuilder();
+        merge(row1, row2, builder, nowInSec);
         return builder.build();
     }
 
@@ -227,7 +225,6 @@ public abstract class Rows
     // Return the minimum timestamp delta between existing and update
     public static long merge(Row existing,
                              Row update,
-                             Columns mergedColumns,
                              Row.Builder builder,
                              int nowInSec)
     {
@@ -248,37 +245,42 @@ public abstract class Rows
         builder.addPrimaryKeyLivenessInfo(mergedInfo);
         builder.addRowDeletion(deletion);
 
-        for (int i = 0; i < mergedColumns.simpleColumnCount(); i++)
+        Iterator<ColumnData> a = existing.iterator();
+        Iterator<ColumnData> b = update.iterator();
+        ColumnData nexta = a.hasNext() ? a.next() : null, nextb = b.hasNext() ? b.next() : null;
+        while (nexta != null | nextb != null)
         {
-            ColumnDefinition c = mergedColumns.getSimple(i);
-            Cell existingCell = existing.getCell(c);
-            Cell updateCell = update.getCell(c);
-            timeDelta = Math.min(timeDelta, Cells.reconcile(existingCell,
-                                                            updateCell,
-                                                            deletion,
-                                                            builder,
-                                                            nowInSec));
-        }
+            int comparison = nexta == null ? 1 : nextb == null ? -1 : nexta.column.compareTo(nextb.column);
+            ColumnData cura = comparison <= 0 ? nexta : null;
+            ColumnData curb = comparison >= 0 ? nextb : null;
+            ColumnDefinition column = (cura != null ? cura : curb).column;
+            if (column.isSimple())
+            {
+                timeDelta = Math.min(timeDelta, Cells.reconcile((Cell) cura, (Cell) curb, deletion, builder, nowInSec));
+            }
+            else
+            {
+                ComplexColumnData existingData = (ComplexColumnData) cura;
+                ComplexColumnData updateData = (ComplexColumnData) curb;
 
-        for (int i = 0; i < mergedColumns.complexColumnCount(); i++)
-        {
-            ColumnDefinition c = mergedColumns.getComplex(i);
-            ComplexColumnData existingData = existing.getComplexColumnData(c);
-            ComplexColumnData updateData = update.getComplexColumnData(c);
+                DeletionTime existingDt = existingData == null ? DeletionTime.LIVE : existingData.complexDeletion();
+                DeletionTime updateDt = updateData == null ? DeletionTime.LIVE : updateData.complexDeletion();
+                DeletionTime maxDt = existingDt.supersedes(updateDt) ? existingDt : updateDt;
+                if (maxDt.supersedes(deletion))
+                    builder.addComplexDeletion(column, maxDt);
+                else
+                    maxDt = deletion;
 
-            DeletionTime existingDt = existingData == null ? DeletionTime.LIVE : existingData.complexDeletion();
-            DeletionTime updateDt = updateData == null ? DeletionTime.LIVE : updateData.complexDeletion();
-            DeletionTime maxDt = existingDt.supersedes(updateDt) ? existingDt : updateDt;
-            if (maxDt.supersedes(deletion))
-                builder.addComplexDeletion(c, maxDt);
-            else
-                maxDt = deletion;
+                Iterator<Cell> existingCells = existingData == null ? null : existingData.iterator();
+                Iterator<Cell> updateCells = updateData == null ? null : updateData.iterator();
+                timeDelta = Math.min(timeDelta, Cells.reconcileComplex(column, existingCells, updateCells, maxDt, builder, nowInSec));
+            }
 
-            Iterator<Cell> existingCells = existingData == null ? null : existingData.iterator();
-            Iterator<Cell> updateCells = updateData == null ? null : updateData.iterator();
-            timeDelta = Math.min(timeDelta, Cells.reconcileComplex(c, existingCells, updateCells, maxDt, builder, nowInSec));
+            if (cura != null)
+                nexta = a.hasNext() ? a.next() : null;
+            if (curb != null)
+                nextb = b.hasNext() ? b.next() : null;
         }
-
         return timeDelta;
     }
 }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorSerializer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorSerializer.java b/src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorSerializer.java
index 2102534..e1b2c09 100644
--- a/src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorSerializer.java
+++ b/src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorSerializer.java
@@ -204,7 +204,7 @@ public class UnfilteredRowIteratorSerializer
         final SerializationHeader sHeader = header.sHeader;
         return new AbstractUnfilteredRowIterator(metadata, header.key, header.partitionDeletion, sHeader.columns(), header.staticRow, header.isReversed, sHeader.stats())
         {
-            private final Row.Builder builder = BTreeRow.sortedBuilder(sHeader.columns().regulars);
+            private final Row.Builder builder = BTreeRow.sortedBuilder();
 
             protected Unfiltered computeNext()
             {

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/rows/UnfilteredRowIterators.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/rows/UnfilteredRowIterators.java b/src/java/org/apache/cassandra/db/rows/UnfilteredRowIterators.java
index 477eac9..e251670 100644
--- a/src/java/org/apache/cassandra/db/rows/UnfilteredRowIterators.java
+++ b/src/java/org/apache/cassandra/db/rows/UnfilteredRowIterators.java
@@ -48,7 +48,7 @@ public abstract class UnfilteredRowIterators
     {
         public void onMergedPartitionLevelDeletion(DeletionTime mergedDeletion, DeletionTime[] versions);
 
-        public void onMergedRows(Row merged, Columns columns, Row[] versions);
+        public void onMergedRows(Row merged, Row[] versions);
         public void onMergedRangeTombstoneMarkers(RangeTombstoneMarker merged, RangeTombstoneMarker[] versions);
 
         public void close();
@@ -100,7 +100,7 @@ public abstract class UnfilteredRowIterators
     public static UnfilteredRowIterator noRowsIterator(final CFMetaData cfm, final DecoratedKey partitionKey, final Row staticRow, final DeletionTime partitionDeletion, final boolean isReverseOrder)
     {
         PartitionColumns columns = staticRow == null ? PartitionColumns.NONE
-                                                     : new PartitionColumns(staticRow.columns(), Columns.NONE);
+                                                     : new PartitionColumns(Columns.from(staticRow.columns()), Columns.NONE);
         return new UnfilteredRowIterator()
         {
             public CFMetaData metadata()
@@ -241,7 +241,7 @@ public abstract class UnfilteredRowIterators
             @Override
             protected Row computeNextStatic(Row row)
             {
-                Row.Builder staticBuilder = allocator.cloningBTreeRowBuilder(columns().statics);
+                Row.Builder staticBuilder = allocator.cloningBTreeRowBuilder();
                 return Rows.copy(row, staticBuilder).build();
             }
 
@@ -249,7 +249,7 @@ public abstract class UnfilteredRowIterators
             protected Row computeNext(Row row)
             {
                 if (regularBuilder == null)
-                    regularBuilder = allocator.cloningBTreeRowBuilder(columns().regulars);
+                    regularBuilder = allocator.cloningBTreeRowBuilder();
 
                 return Rows.copy(row, regularBuilder).build();
             }
@@ -377,7 +377,7 @@ public abstract class UnfilteredRowIterators
                   iterators.get(0).partitionKey(),
                   partitionDeletion,
                   columns,
-                  mergeStaticRows(metadata, iterators, columns.statics, nowInSec, listener, partitionDeletion),
+                  mergeStaticRows(iterators, columns.statics, nowInSec, listener, partitionDeletion),
                   reversed,
                   mergeStats(iterators));
 
@@ -450,8 +450,7 @@ public abstract class UnfilteredRowIterators
             return delTime;
         }
 
-        private static Row mergeStaticRows(CFMetaData metadata,
-                                           List<UnfilteredRowIterator> iterators,
+        private static Row mergeStaticRows(List<UnfilteredRowIterator> iterators,
                                            Columns columns,
                                            int nowInSec,
                                            MergeListener listener,
@@ -463,7 +462,7 @@ public abstract class UnfilteredRowIterators
             if (iterators.stream().allMatch(iter -> iter.staticRow().isEmpty()))
                 return Rows.EMPTY_STATIC_ROW;
 
-            Row.Merger merger = new Row.Merger(iterators.size(), nowInSec, columns);
+            Row.Merger merger = new Row.Merger(iterators.size(), nowInSec, columns.hasComplex());
             for (int i = 0; i < iterators.size(); i++)
                 merger.add(i, iterators.get(i).staticRow());
 
@@ -471,7 +470,7 @@ public abstract class UnfilteredRowIterators
             if (merged == null)
                 merged = Rows.EMPTY_STATIC_ROW;
             if (listener != null)
-                listener.onMergedRows(merged, columns, merger.mergedRows());
+                listener.onMergedRows(merged, merger.mergedRows());
             return merged;
         }
 
@@ -530,7 +529,7 @@ public abstract class UnfilteredRowIterators
 
             private MergeReducer(int size, boolean reversed, int nowInSec, MergeListener listener)
             {
-                this.rowMerger = new Row.Merger(size, nowInSec, columns().regulars);
+                this.rowMerger = new Row.Merger(size, nowInSec, columns().regulars.hasComplex());
                 this.markerMerger = new RangeTombstoneMarker.Merger(size, partitionLevelDeletion(), reversed);
                 this.listener = listener;
             }
@@ -557,7 +556,7 @@ public abstract class UnfilteredRowIterators
                 {
                     Row merged = rowMerger.merge(markerMerger.activeDeletion());
                     if (listener != null)
-                        listener.onMergedRows(merged == null ? BTreeRow.emptyRow(rowMerger.mergedClustering()) : merged, columns().regulars, rowMerger.mergedRows());
+                        listener.onMergedRows(merged == null ? BTreeRow.emptyRow(rowMerger.mergedClustering()) : merged, rowMerger.mergedRows());
                     return merged;
                 }
                 else

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/rows/UnfilteredSerializer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/rows/UnfilteredSerializer.java b/src/java/org/apache/cassandra/db/rows/UnfilteredSerializer.java
index 60bd06f..605a67b 100644
--- a/src/java/org/apache/cassandra/db/rows/UnfilteredSerializer.java
+++ b/src/java/org/apache/cassandra/db/rows/UnfilteredSerializer.java
@@ -287,7 +287,7 @@ public class UnfilteredSerializer
     {
         int flags = in.readUnsignedByte();
         assert !isEndOfPartition(flags) && kind(flags) == Unfiltered.Kind.ROW && isStatic(flags) : flags;
-        Row.Builder builder = BTreeRow.sortedBuilder(helper.fetchedStaticColumns(header));
+        Row.Builder builder = BTreeRow.sortedBuilder();
         builder.newRow(Clustering.STATIC_CLUSTERING);
         return deserializeRowBody(in, header, helper, flags, builder);
     }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/view/MaterializedView.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/view/MaterializedView.java b/src/java/org/apache/cassandra/db/view/MaterializedView.java
index 0f6cf06..5128b7c 100644
--- a/src/java/org/apache/cassandra/db/view/MaterializedView.java
+++ b/src/java/org/apache/cassandra/db/view/MaterializedView.java
@@ -268,7 +268,7 @@ public class MaterializedView
                                             int nowInSec)
     {
         CFMetaData viewCfm = getViewCfs().metadata;
-        Row.Builder builder = BTreeRow.unsortedBuilder(viewCfm.partitionColumns().regulars, nowInSec);
+        Row.Builder builder = BTreeRow.unsortedBuilder(nowInSec);
         builder.newRow(viewClustering(temporalRow, resolver));
         builder.addRowDeletion(deletionTime);
         return PartitionUpdate.singleRowUpdate(viewCfm, partitionKey, builder.build());
@@ -286,7 +286,7 @@ public class MaterializedView
     {
 
         CFMetaData viewCfm = getViewCfs().metadata;
-        Row.Builder builder = BTreeRow.unsortedBuilder(viewCfm.partitionColumns().regulars, nowInSec);
+        Row.Builder builder = BTreeRow.unsortedBuilder(nowInSec);
         builder.newRow(viewClustering(temporalRow, resolver));
         builder.addComplexDeletion(deletedColumn, deletionTime);
         return PartitionUpdate.singleRowUpdate(viewCfm, partitionKey, builder.build());
@@ -363,7 +363,7 @@ public class MaterializedView
             return null;
         }
 
-        Row.Builder regularBuilder = BTreeRow.unsortedBuilder(viewCfs.metadata.partitionColumns().regulars, temporalRow.nowInSec);
+        Row.Builder regularBuilder = BTreeRow.unsortedBuilder(temporalRow.nowInSec);
 
         CBuilder clustering = CBuilder.create(viewCfs.getComparator());
         for (int i = 0; i < viewCfs.metadata.clusteringColumns().size(); i++)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/index/SecondaryIndexManager.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/SecondaryIndexManager.java b/src/java/org/apache/cassandra/index/SecondaryIndexManager.java
index 6f30305..fabfebc 100644
--- a/src/java/org/apache/cassandra/index/SecondaryIndexManager.java
+++ b/src/java/org/apache/cassandra/index/SecondaryIndexManager.java
@@ -740,9 +740,9 @@ public class SecondaryIndexManager implements IndexRegistry
 
         public void onUpdated(Row existing, Row updated)
         {
-            final Row.Builder toRemove = BTreeRow.sortedBuilder(existing.columns());
+            final Row.Builder toRemove = BTreeRow.sortedBuilder();
             toRemove.newRow(existing.clustering());
-            final Row.Builder toInsert = BTreeRow.sortedBuilder(updated.columns());
+            final Row.Builder toInsert = BTreeRow.sortedBuilder();
             toInsert.newRow(updated.clustering());
             // diff listener collates the columns to be added & removed from the indexes
             RowDiffListener diffListener = new RowDiffListener()
@@ -771,7 +771,7 @@ public class SecondaryIndexManager implements IndexRegistry
 
                 }
             };
-            Rows.diff(diffListener, updated, updated.columns().mergeTo(existing.columns()), existing);
+            Rows.diff(diffListener, updated, existing);
             Row oldRow = toRemove.build();
             Row newRow = toInsert.build();
             for (Index.Indexer indexer : indexers)
@@ -834,7 +834,7 @@ public class SecondaryIndexManager implements IndexRegistry
                 rows = new Row[versions];
         }
 
-        public void onRowMerge(Columns columns, Row merged, Row...versions)
+        public void onRowMerge(Row merged, Row...versions)
         {
             // Diff listener constructs rows representing deltas between the merged and original versions
             // These delta rows are then passed to registered indexes for removal processing
@@ -859,7 +859,7 @@ public class SecondaryIndexManager implements IndexRegistry
                     {
                         if (builders[i] == null)
                         {
-                            builders[i] = BTreeRow.sortedBuilder(columns);
+                            builders[i] = BTreeRow.sortedBuilder();
                             builders[i].newRow(clustering);
                         }
                         builders[i].addCell(original);
@@ -867,7 +867,7 @@ public class SecondaryIndexManager implements IndexRegistry
                 }
             };
 
-            Rows.diff(diffListener, merged, columns, versions);
+            Rows.diff(diffListener, merged, versions);
 
             for(int i = 0; i < builders.length; i++)
                 if (builders[i] != null)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/index/transactions/CompactionTransaction.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/transactions/CompactionTransaction.java b/src/java/org/apache/cassandra/index/transactions/CompactionTransaction.java
index a9fbf41..f2436af 100644
--- a/src/java/org/apache/cassandra/index/transactions/CompactionTransaction.java
+++ b/src/java/org/apache/cassandra/index/transactions/CompactionTransaction.java
@@ -18,7 +18,6 @@
 
 package org.apache.cassandra.index.transactions;
 
-import org.apache.cassandra.db.Columns;
 import org.apache.cassandra.db.rows.Row;
 
 /**
@@ -33,12 +32,12 @@ import org.apache.cassandra.db.rows.Row;
  */
 public interface CompactionTransaction extends IndexTransaction
 {
-    void onRowMerge(Columns columns, Row merged, Row...versions);
+    void onRowMerge(Row merged, Row...versions);
 
     CompactionTransaction NO_OP = new CompactionTransaction()
     {
         public void start(){}
-        public void onRowMerge(Columns columns, Row merged, Row...versions){}
+        public void onRowMerge(Row merged, Row...versions){}
         public void commit(){}
     };
 }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/io/sstable/SSTableSimpleIterator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/io/sstable/SSTableSimpleIterator.java b/src/java/org/apache/cassandra/io/sstable/SSTableSimpleIterator.java
index 7382e5e..365d469 100644
--- a/src/java/org/apache/cassandra/io/sstable/SSTableSimpleIterator.java
+++ b/src/java/org/apache/cassandra/io/sstable/SSTableSimpleIterator.java
@@ -71,7 +71,7 @@ public abstract class SSTableSimpleIterator extends AbstractIterator<Unfiltered>
         {
             super(metadata, in, helper);
             this.header = header;
-            this.builder = BTreeRow.sortedBuilder(helper.fetchedRegularColumns(header));
+            this.builder = BTreeRow.sortedBuilder();
         }
 
         public Row readStaticRow() throws IOException

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/service/DataResolver.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/service/DataResolver.java b/src/java/org/apache/cassandra/service/DataResolver.java
index cd69ef3..0eb13c3 100644
--- a/src/java/org/apache/cassandra/service/DataResolver.java
+++ b/src/java/org/apache/cassandra/service/DataResolver.java
@@ -212,7 +212,7 @@ public class DataResolver extends ResponseResolver
             {
                 if (currentRows[i] == null)
                 {
-                    currentRows[i] = BTreeRow.sortedBuilder(clustering == Clustering.STATIC_CLUSTERING ? columns.statics : columns.regulars);
+                    currentRows[i] = BTreeRow.sortedBuilder();
                     currentRows[i].newRow(clustering);
                 }
                 return currentRows[i];
@@ -227,7 +227,7 @@ public class DataResolver extends ResponseResolver
                 }
             }
 
-            public void onMergedRows(Row merged, Columns columns, Row[] versions)
+            public void onMergedRows(Row merged, Row[] versions)
             {
                 // If a row was shadowed post merged, it must be by a partition level or range tombstone, and we handle
                 // those case directly in their respective methods (in other words, it would be inefficient to send a row
@@ -235,7 +235,7 @@ public class DataResolver extends ResponseResolver
                 if (merged.isEmpty())
                     return;
 
-                Rows.diff(diffListener, merged, columns, versions);
+                Rows.diff(diffListener, merged, versions);
                 for (int i = 0; i < currentRows.length; i++)
                 {
                     if (currentRows[i] != null)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/thrift/ThriftResultsMerger.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/thrift/ThriftResultsMerger.java b/src/java/org/apache/cassandra/thrift/ThriftResultsMerger.java
index 084e835..72e4399 100644
--- a/src/java/org/apache/cassandra/thrift/ThriftResultsMerger.java
+++ b/src/java/org/apache/cassandra/thrift/ThriftResultsMerger.java
@@ -112,14 +112,14 @@ public class ThriftResultsMerger extends WrappingUnfilteredPartitionIterator
             super(results);
             assert results.metadata().isStaticCompactTable();
             this.nowInSec = nowInSec;
-            this.builder = BTreeRow.sortedBuilder(results.columns().regulars);
+            this.builder = BTreeRow.sortedBuilder();
         }
 
         private void init()
         {
             assert !isInit;
             Row staticRow = super.staticRow();
-            assert staticRow.columns().complexColumnCount() == 0;
+            assert !staticRow.hasComplex();
 
             staticCells = staticRow.cells().iterator();
             updateNextToMerge();
@@ -220,7 +220,7 @@ public class ThriftResultsMerger extends WrappingUnfilteredPartitionIterator
             this.superColumnMapColumn = results.metadata().compactValueColumn();
             assert superColumnMapColumn != null && superColumnMapColumn.type instanceof MapType;
 
-            this.builder = BTreeRow.sortedBuilder(Columns.of(superColumnMapColumn));
+            this.builder = BTreeRow.sortedBuilder();
             this.columnComparator = ((MapType)superColumnMapColumn.type).nameComparator();
         }
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/utils/memory/AbstractAllocator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/memory/AbstractAllocator.java b/src/java/org/apache/cassandra/utils/memory/AbstractAllocator.java
index a76732f..9066335 100644
--- a/src/java/org/apache/cassandra/utils/memory/AbstractAllocator.java
+++ b/src/java/org/apache/cassandra/utils/memory/AbstractAllocator.java
@@ -46,18 +46,18 @@ public abstract class AbstractAllocator
 
     public abstract ByteBuffer allocate(int size);
 
-    public Row.Builder cloningBTreeRowBuilder(Columns columns)
+    public Row.Builder cloningBTreeRowBuilder()
     {
-        return new CloningBTreeRowBuilder(columns, this);
+        return new CloningBTreeRowBuilder(this);
     }
 
     private static class CloningBTreeRowBuilder extends BTreeRow.Builder
     {
         private final AbstractAllocator allocator;
 
-        private CloningBTreeRowBuilder(Columns columns, AbstractAllocator allocator)
+        private CloningBTreeRowBuilder(AbstractAllocator allocator)
         {
-            super(columns, true);
+            super(true);
             this.allocator = allocator;
         }
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/utils/memory/MemtableAllocator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/memory/MemtableAllocator.java b/src/java/org/apache/cassandra/utils/memory/MemtableAllocator.java
index 15499ae..588b433 100644
--- a/src/java/org/apache/cassandra/utils/memory/MemtableAllocator.java
+++ b/src/java/org/apache/cassandra/utils/memory/MemtableAllocator.java
@@ -59,7 +59,7 @@ public abstract class MemtableAllocator
         this.offHeap = offHeap;
     }
 
-    public abstract Row.Builder rowBuilder(CFMetaData metadata, OpOrder.Group opGroup, boolean isStatic);
+    public abstract Row.Builder rowBuilder(OpOrder.Group opGroup);
     public abstract DecoratedKey clone(DecoratedKey key, OpOrder.Group opGroup);
     public abstract DataReclaimer reclaimer();
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/utils/memory/MemtableBufferAllocator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/memory/MemtableBufferAllocator.java b/src/java/org/apache/cassandra/utils/memory/MemtableBufferAllocator.java
index 8205f3b..fb35b38 100644
--- a/src/java/org/apache/cassandra/utils/memory/MemtableBufferAllocator.java
+++ b/src/java/org/apache/cassandra/utils/memory/MemtableBufferAllocator.java
@@ -31,10 +31,9 @@ public abstract class MemtableBufferAllocator extends MemtableAllocator
         super(onHeap, offHeap);
     }
 
-    public Row.Builder rowBuilder(CFMetaData metadata, OpOrder.Group writeOp, boolean isStatic)
+    public Row.Builder rowBuilder(OpOrder.Group writeOp)
     {
-        Columns columns = isStatic ? metadata.partitionColumns().statics : metadata.partitionColumns().regulars;
-        return allocator(writeOp).cloningBTreeRowBuilder(columns);
+        return allocator(writeOp).cloningBTreeRowBuilder();
     }
 
     public DecoratedKey clone(DecoratedKey key, OpOrder.Group writeOp)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/utils/memory/NativeAllocator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/memory/NativeAllocator.java b/src/java/org/apache/cassandra/utils/memory/NativeAllocator.java
index 7b95430..e5458b4 100644
--- a/src/java/org/apache/cassandra/utils/memory/NativeAllocator.java
+++ b/src/java/org/apache/cassandra/utils/memory/NativeAllocator.java
@@ -53,7 +53,7 @@ public class NativeAllocator extends MemtableAllocator
         super(pool.onHeap.newAllocator(), pool.offHeap.newAllocator());
     }
 
-    public Row.Builder rowBuilder(CFMetaData metadata, OpOrder.Group opGroup, boolean isStatic)
+    public Row.Builder rowBuilder(OpOrder.Group opGroup)
     {
         // TODO
         throw new UnsupportedOperationException();

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/test/unit/org/apache/cassandra/db/RowTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/db/RowTest.java b/test/unit/org/apache/cassandra/db/RowTest.java
index cd80a2f..0af183c 100644
--- a/test/unit/org/apache/cassandra/db/RowTest.java
+++ b/test/unit/org/apache/cassandra/db/RowTest.java
@@ -127,7 +127,7 @@ public class RowTest
         ColumnDefinition defA = cfm.getColumnDefinition(new ColumnIdentifier("a", true));
         ColumnDefinition defB = cfm.getColumnDefinition(new ColumnIdentifier("b", true));
 
-        Row.Builder builder = BTreeRow.unsortedBuilder(cfm.partitionColumns().regulars, nowInSeconds);
+        Row.Builder builder = BTreeRow.unsortedBuilder(nowInSeconds);
         builder.newRow(cfm.comparator.make("c1"));
         writeSimpleCellValue(builder, cfm, defA, "a1", 0);
         writeSimpleCellValue(builder, cfm, defA, "a2", 1);

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/test/unit/org/apache/cassandra/triggers/TriggerExecutorTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/triggers/TriggerExecutorTest.java b/test/unit/org/apache/cassandra/triggers/TriggerExecutorTest.java
index 09a337a..44391c8 100644
--- a/test/unit/org/apache/cassandra/triggers/TriggerExecutorTest.java
+++ b/test/unit/org/apache/cassandra/triggers/TriggerExecutorTest.java
@@ -268,7 +268,7 @@ public class TriggerExecutorTest
 
     private static PartitionUpdate makeCf(CFMetaData metadata, String key, String columnValue1, String columnValue2)
     {
-        Row.Builder builder = BTreeRow.unsortedBuilder(metadata.partitionColumns().regulars, FBUtilities.nowInSeconds());
+        Row.Builder builder = BTreeRow.unsortedBuilder(FBUtilities.nowInSeconds());
         builder.newRow(Clustering.EMPTY);
         long ts = FBUtilities.timestampMicros();
         if (columnValue1 != null)


[2/3] cassandra git commit: Improve efficiency of Row

Posted by be...@apache.org.
Improve efficiency of Row

Removes need to store a Columns instance within a Row,
avoiding both the cost of merging and storing these.

Improves algorithmic efficiency of Rows.merge by performing
a straight forward linear merge on the present ColumnData.

Improves algorithmic efficiency of Rows.diff by utilising MergeIterator

patch by benedict; reviewed by marcus for CASSANDRA-10193


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/aa576263
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/aa576263
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/aa576263

Branch: refs/heads/trunk
Commit: aa5762633a06e229cd9122983eec94bdb74fc23a
Parents: ce63ccc
Author: Benedict Elliott Smith <be...@apache.org>
Authored: Wed Aug 26 19:02:03 2015 +0100
Committer: Benedict Elliott Smith <be...@apache.org>
Committed: Fri Sep 4 13:47:07 2015 +0100

----------------------------------------------------------------------
 .../apache/cassandra/cql3/UpdateParameters.java |   4 +-
 src/java/org/apache/cassandra/db/Columns.java   |   2 +-
 .../org/apache/cassandra/db/LegacyLayout.java   |   4 +-
 .../apache/cassandra/db/RowUpdateBuilder.java   |   6 +-
 .../cassandra/db/SerializationHeader.java       |   1 -
 .../cassandra/db/UnfilteredDeserializer.java    |   2 +-
 .../db/compaction/CompactionIterator.java       |   4 +-
 .../db/partitions/AtomicBTreePartition.java     |   8 +-
 .../db/partitions/PartitionUpdate.java          |  10 +-
 .../apache/cassandra/db/rows/AbstractRow.java   |   1 -
 .../org/apache/cassandra/db/rows/BTreeRow.java  |  64 +++---
 src/java/org/apache/cassandra/db/rows/Row.java  |  15 +-
 src/java/org/apache/cassandra/db/rows/Rows.java | 214 ++++++++++---------
 .../rows/UnfilteredRowIteratorSerializer.java   |   2 +-
 .../db/rows/UnfilteredRowIterators.java         |  21 +-
 .../cassandra/db/rows/UnfilteredSerializer.java |   2 +-
 .../cassandra/db/view/MaterializedView.java     |   6 +-
 .../cassandra/index/SecondaryIndexManager.java  |  12 +-
 .../transactions/CompactionTransaction.java     |   5 +-
 .../io/sstable/SSTableSimpleIterator.java       |   2 +-
 .../apache/cassandra/service/DataResolver.java  |   6 +-
 .../cassandra/thrift/ThriftResultsMerger.java   |   6 +-
 .../utils/memory/AbstractAllocator.java         |   8 +-
 .../utils/memory/MemtableAllocator.java         |   2 +-
 .../utils/memory/MemtableBufferAllocator.java   |   5 +-
 .../cassandra/utils/memory/NativeAllocator.java |   2 +-
 test/unit/org/apache/cassandra/db/RowTest.java  |   2 +-
 .../cassandra/triggers/TriggerExecutorTest.java |   2 +-
 28 files changed, 210 insertions(+), 208 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/cql3/UpdateParameters.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/cql3/UpdateParameters.java b/src/java/org/apache/cassandra/cql3/UpdateParameters.java
index 22aeddd..cd81f84 100644
--- a/src/java/org/apache/cassandra/cql3/UpdateParameters.java
+++ b/src/java/org/apache/cassandra/cql3/UpdateParameters.java
@@ -121,13 +121,13 @@ public class UpdateParameters
         if (clustering == Clustering.STATIC_CLUSTERING)
         {
             if (staticBuilder == null)
-                staticBuilder = BTreeRow.unsortedBuilder(updatedColumns.statics, nowInSec);
+                staticBuilder = BTreeRow.unsortedBuilder(nowInSec);
             builder = staticBuilder;
         }
         else
         {
             if (regularBuilder == null)
-                regularBuilder = BTreeRow.unsortedBuilder(updatedColumns.regulars, nowInSec);
+                regularBuilder = BTreeRow.unsortedBuilder(nowInSec);
             builder = regularBuilder;
         }
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/Columns.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/Columns.java b/src/java/org/apache/cassandra/db/Columns.java
index 46e8401..582b742 100644
--- a/src/java/org/apache/cassandra/db/Columns.java
+++ b/src/java/org/apache/cassandra/db/Columns.java
@@ -86,7 +86,7 @@ public class Columns extends AbstractCollection<ColumnDefinition> implements Col
      * @param s the set from which to create the new {@code Columns}.
      * @return the newly created {@code Columns} containing the columns from {@code s}.
      */
-    public static Columns from(Set<ColumnDefinition> s)
+    public static Columns from(Collection<ColumnDefinition> s)
     {
         Object[] tree = BTree.<ColumnDefinition>builder(Comparator.naturalOrder()).addAll(s).build();
         return new Columns(tree, findFirstComplexIdx(tree));

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/LegacyLayout.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/LegacyLayout.java b/src/java/org/apache/cassandra/db/LegacyLayout.java
index 7b03e46..b6f6657 100644
--- a/src/java/org/apache/cassandra/db/LegacyLayout.java
+++ b/src/java/org/apache/cassandra/db/LegacyLayout.java
@@ -539,7 +539,7 @@ public abstract class LegacyLayout
         for (ColumnDefinition column : statics)
             columnsToFetch.add(column.name.bytes);
 
-        Row.Builder builder = BTreeRow.unsortedBuilder(statics, FBUtilities.nowInSeconds());
+        Row.Builder builder = BTreeRow.unsortedBuilder(FBUtilities.nowInSeconds());
         builder.newRow(Clustering.STATIC_CLUSTERING);
 
         boolean foundOne = false;
@@ -1058,7 +1058,7 @@ public abstract class LegacyLayout
             // We cannot use a sorted builder because we don't have exactly the same ordering in 3.0 and pre-3.0. More precisely, within a row, we
             // store all simple columns before the complex ones in 3.0, which we use to sort everything sorted by the column name before. Note however
             // that the unsorted builder won't have to reconcile cells, so the exact value we pass for nowInSec doesn't matter.
-            this.builder = BTreeRow.unsortedBuilder(isStatic ? metadata.partitionColumns().statics : metadata.partitionColumns().regulars, FBUtilities.nowInSeconds());
+            this.builder = BTreeRow.unsortedBuilder(FBUtilities.nowInSeconds());
         }
 
         public static CellGrouper staticGrouper(CFMetaData metadata, SerializationHelper helper)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/RowUpdateBuilder.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/RowUpdateBuilder.java b/src/java/org/apache/cassandra/db/RowUpdateBuilder.java
index 65f08b4..8e6ce3e 100644
--- a/src/java/org/apache/cassandra/db/RowUpdateBuilder.java
+++ b/src/java/org/apache/cassandra/db/RowUpdateBuilder.java
@@ -82,7 +82,7 @@ public class RowUpdateBuilder
         assert staticBuilder == null : "Cannot update both static and non-static columns with the same RowUpdateBuilder object";
         assert regularBuilder == null : "Cannot add the clustering twice to the same row";
 
-        regularBuilder = BTreeRow.unsortedBuilder(update.columns().regulars, FBUtilities.nowInSeconds());
+        regularBuilder = BTreeRow.unsortedBuilder(FBUtilities.nowInSeconds());
         regularBuilder.newRow(clustering);
 
         // If a CQL table, add the "row marker"
@@ -107,7 +107,7 @@ public class RowUpdateBuilder
         assert regularBuilder == null : "Cannot update both static and non-static columns with the same RowUpdateBuilder object";
         if (staticBuilder == null)
         {
-            staticBuilder = BTreeRow.unsortedBuilder(update.columns().statics, FBUtilities.nowInSeconds());
+            staticBuilder = BTreeRow.unsortedBuilder(FBUtilities.nowInSeconds());
             staticBuilder.newRow(Clustering.STATIC_CLUSTERING);
         }
         return staticBuilder;
@@ -188,7 +188,7 @@ public class RowUpdateBuilder
         assert clusteringValues.length == update.metadata().comparator.size() || (clusteringValues.length == 0 && !update.columns().statics.isEmpty());
 
         boolean isStatic = clusteringValues.length != update.metadata().comparator.size();
-        Row.Builder builder = BTreeRow.sortedBuilder(isStatic ? update.columns().statics : update.columns().regulars);
+        Row.Builder builder = BTreeRow.sortedBuilder();
 
         if (isStatic)
             builder.newRow(Clustering.STATIC_CLUSTERING);

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/SerializationHeader.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/SerializationHeader.java b/src/java/org/apache/cassandra/db/SerializationHeader.java
index a840f02..582a6af 100644
--- a/src/java/org/apache/cassandra/db/SerializationHeader.java
+++ b/src/java/org/apache/cassandra/db/SerializationHeader.java
@@ -23,7 +23,6 @@ import java.util.*;
 
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
-import com.google.common.base.Function;
 
 import org.apache.cassandra.config.CFMetaData;
 import org.apache.cassandra.config.ColumnDefinition;

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java b/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java
index 2f75f34..d47da3c 100644
--- a/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java
+++ b/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java
@@ -121,7 +121,7 @@ public abstract class UnfilteredDeserializer
             super(metadata, in, helper);
             this.header = header;
             this.clusteringDeserializer = new ClusteringPrefix.Deserializer(metadata.comparator, in, header);
-            this.builder = BTreeRow.sortedBuilder(helper.fetchedRegularColumns(header));
+            this.builder = BTreeRow.sortedBuilder();
         }
 
         public boolean hasNext() throws IOException

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java
index 4aaa17a..fe18c04 100644
--- a/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java
+++ b/src/java/org/apache/cassandra/db/compaction/CompactionIterator.java
@@ -187,10 +187,10 @@ public class CompactionIterator extends CompactionInfo.Holder implements Unfilte
                     {
                     }
 
-                    public void onMergedRows(Row merged, Columns columns, Row[] versions)
+                    public void onMergedRows(Row merged, Row[] versions)
                     {
                         indexTransaction.start();
-                        indexTransaction.onRowMerge(columns, merged, versions);
+                        indexTransaction.onRowMerge(merged, versions);
                         indexTransaction.commit();
                     }
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/partitions/AtomicBTreePartition.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/partitions/AtomicBTreePartition.java b/src/java/org/apache/cassandra/db/partitions/AtomicBTreePartition.java
index d279a6b..e00a75e 100644
--- a/src/java/org/apache/cassandra/db/partitions/AtomicBTreePartition.java
+++ b/src/java/org/apache/cassandra/db/partitions/AtomicBTreePartition.java
@@ -263,10 +263,10 @@ public class AtomicBTreePartition extends AbstractBTreePartition
             boolean isStatic = clustering == Clustering.STATIC_CLUSTERING;
             // We know we only insert/update one static per PartitionUpdate, so no point in saving the builder
             if (isStatic)
-                return allocator.rowBuilder(updating.metadata(), writeOp, true);
+                return allocator.rowBuilder(writeOp);
 
             if (regularBuilder == null)
-                regularBuilder = allocator.rowBuilder(updating.metadata(), writeOp, false);
+                regularBuilder = allocator.rowBuilder(writeOp);
             return regularBuilder;
         }
 
@@ -285,10 +285,8 @@ public class AtomicBTreePartition extends AbstractBTreePartition
 
         public Row apply(Row existing, Row update)
         {
-            Columns mergedColumns = existing.columns().mergeTo(update.columns());
-
             Row.Builder builder = builder(existing.clustering());
-            colUpdateTimeDelta = Math.min(colUpdateTimeDelta, Rows.merge(existing, update, mergedColumns, builder, nowInSec));
+            colUpdateTimeDelta = Math.min(colUpdateTimeDelta, Rows.merge(existing, update, builder, nowInSec));
 
             Row reconciled = builder.build();
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/partitions/PartitionUpdate.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/partitions/PartitionUpdate.java b/src/java/org/apache/cassandra/db/partitions/PartitionUpdate.java
index 5e056d2..1a27b39 100644
--- a/src/java/org/apache/cassandra/db/partitions/PartitionUpdate.java
+++ b/src/java/org/apache/cassandra/db/partitions/PartitionUpdate.java
@@ -168,12 +168,12 @@ public class PartitionUpdate extends AbstractBTreePartition
         if (row.isStatic())
         {
             Holder holder = new Holder(BTree.empty(), deletionInfo, row, EncodingStats.NO_STATS);
-            return new PartitionUpdate(metadata, key, new PartitionColumns(row.columns(), Columns.NONE), holder, deletionInfo, false);
+            return new PartitionUpdate(metadata, key, new PartitionColumns(Columns.from(row.columns()), Columns.NONE), holder, deletionInfo, false);
         }
         else
         {
             Holder holder = new Holder(BTree.singleton(row), deletionInfo, Rows.EMPTY_STATIC_ROW, EncodingStats.NO_STATS);
-            return new PartitionUpdate(metadata, key, new PartitionColumns(Columns.NONE, row.columns()), holder, deletionInfo, false);
+            return new PartitionUpdate(metadata, key, new PartitionColumns(Columns.NONE, Columns.from(row.columns())), holder, deletionInfo, false);
         }
     }
 
@@ -528,7 +528,8 @@ public class PartitionUpdate extends AbstractBTreePartition
 
         if (row.isStatic())
         {
-            // We test for == first because in most case it'll be true and that is faster
+            // this assert is expensive, and possibly of limited value; we should consider removing it
+            // or introducing a new class of assertions for test purposes
             assert columns().statics.containsAll(row.columns()) : columns().statics + " is not superset of " + row.columns();
             Row staticRow = holder.staticRow.isEmpty()
                       ? row
@@ -537,7 +538,8 @@ public class PartitionUpdate extends AbstractBTreePartition
         }
         else
         {
-            // We test for == first because in most case it'll be true and that is faster
+            // this assert is expensive, and possibly of limited value; we should consider removing it
+            // or introducing a new class of assertions for test purposes
             assert columns().regulars.containsAll(row.columns()) : columns().regulars + " is not superset of " + row.columns();
             rowBuilder.add(row);
         }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/rows/AbstractRow.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/rows/AbstractRow.java b/src/java/org/apache/cassandra/db/rows/AbstractRow.java
index fca765f..555146e 100644
--- a/src/java/org/apache/cassandra/db/rows/AbstractRow.java
+++ b/src/java/org/apache/cassandra/db/rows/AbstractRow.java
@@ -154,7 +154,6 @@ public abstract class AbstractRow extends AbstractCollection<ColumnData> impleme
 
         Row that = (Row)other;
         if (!this.clustering().equals(that.clustering())
-             || !this.columns().equals(that.columns())
              || !this.primaryKeyLivenessInfo().equals(that.primaryKeyLivenessInfo())
              || !this.deletion().equals(that.deletion()))
             return false;

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/rows/BTreeRow.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/rows/BTreeRow.java b/src/java/org/apache/cassandra/db/rows/BTreeRow.java
index ed036af..653ffcd 100644
--- a/src/java/org/apache/cassandra/db/rows/BTreeRow.java
+++ b/src/java/org/apache/cassandra/db/rows/BTreeRow.java
@@ -23,6 +23,7 @@ import java.util.function.Predicate;
 
 import com.google.common.base.Function;
 import com.google.common.collect.Collections2;
+import com.google.common.collect.Iterables;
 import com.google.common.collect.Iterators;
 
 import org.apache.cassandra.config.CFMetaData;
@@ -36,7 +37,6 @@ import org.apache.cassandra.utils.ByteBufferUtil;
 import org.apache.cassandra.utils.ObjectSizes;
 import org.apache.cassandra.utils.btree.BTree;
 import org.apache.cassandra.utils.btree.BTreeSearchIterator;
-import org.apache.cassandra.utils.btree.BTreeSet;
 import org.apache.cassandra.utils.btree.UpdateFunction;
 
 /**
@@ -47,7 +47,6 @@ public class BTreeRow extends AbstractRow
     private static final long EMPTY_SIZE = ObjectSizes.measure(emptyRow(Clustering.EMPTY));
 
     private final Clustering clustering;
-    private final Columns columns;
     private final LivenessInfo primaryKeyLivenessInfo;
     private final DeletionTime deletion;
 
@@ -63,10 +62,9 @@ public class BTreeRow extends AbstractRow
     // no expiring cells, this will be Integer.MAX_VALUE;
     private final int minLocalDeletionTime;
 
-    private BTreeRow(Clustering clustering, Columns columns, LivenessInfo primaryKeyLivenessInfo, DeletionTime deletion, Object[] btree, int minLocalDeletionTime)
+    private BTreeRow(Clustering clustering, LivenessInfo primaryKeyLivenessInfo, DeletionTime deletion, Object[] btree, int minLocalDeletionTime)
     {
         this.clustering = clustering;
-        this.columns = columns;
         this.primaryKeyLivenessInfo = primaryKeyLivenessInfo;
         this.deletion = deletion;
         this.btree = btree;
@@ -74,7 +72,7 @@ public class BTreeRow extends AbstractRow
     }
 
     // Note that it's often easier/safer to use the sortedBuilder/unsortedBuilder or one of the static creation method below. Only directly useful in a small amount of cases.
-    public static BTreeRow create(Clustering clustering, Columns columns, LivenessInfo primaryKeyLivenessInfo, DeletionTime deletion, Object[] btree)
+    public static BTreeRow create(Clustering clustering, LivenessInfo primaryKeyLivenessInfo, DeletionTime deletion, Object[] btree)
     {
         int minDeletionTime = Math.min(minDeletionTime(primaryKeyLivenessInfo), minDeletionTime(deletion));
         if (minDeletionTime != Integer.MIN_VALUE)
@@ -83,33 +81,33 @@ public class BTreeRow extends AbstractRow
                 minDeletionTime = Math.min(minDeletionTime, minDeletionTime(cd));
         }
 
-        return new BTreeRow(clustering, columns, primaryKeyLivenessInfo, deletion, btree, minDeletionTime);
+        return new BTreeRow(clustering, primaryKeyLivenessInfo, deletion, btree, minDeletionTime);
     }
 
     public static BTreeRow emptyRow(Clustering clustering)
     {
-        return new BTreeRow(clustering, Columns.NONE, LivenessInfo.EMPTY, DeletionTime.LIVE, BTree.empty(), Integer.MAX_VALUE);
+        return new BTreeRow(clustering, LivenessInfo.EMPTY, DeletionTime.LIVE, BTree.empty(), Integer.MAX_VALUE);
     }
 
     public static BTreeRow singleCellRow(Clustering clustering, Cell cell)
     {
         if (cell.column().isSimple())
-            return new BTreeRow(clustering, Columns.of(cell.column()), LivenessInfo.EMPTY, DeletionTime.LIVE, BTree.singleton(cell), minDeletionTime(cell));
+            return new BTreeRow(clustering, LivenessInfo.EMPTY, DeletionTime.LIVE, BTree.singleton(cell), minDeletionTime(cell));
 
         ComplexColumnData complexData = new ComplexColumnData(cell.column(), new Cell[]{ cell }, DeletionTime.LIVE);
-        return new BTreeRow(clustering, Columns.of(cell.column()), LivenessInfo.EMPTY, DeletionTime.LIVE, BTree.singleton(complexData), minDeletionTime(cell));
+        return new BTreeRow(clustering, LivenessInfo.EMPTY, DeletionTime.LIVE, BTree.singleton(complexData), minDeletionTime(cell));
     }
 
     public static BTreeRow emptyDeletedRow(Clustering clustering, DeletionTime deletion)
     {
         assert !deletion.isLive();
-        return new BTreeRow(clustering, Columns.NONE, LivenessInfo.EMPTY, deletion, BTree.empty(), Integer.MIN_VALUE);
+        return new BTreeRow(clustering, LivenessInfo.EMPTY, deletion, BTree.empty(), Integer.MIN_VALUE);
     }
 
     public static BTreeRow noCellLiveRow(Clustering clustering, LivenessInfo primaryKeyLivenessInfo)
     {
         assert !primaryKeyLivenessInfo.isEmpty();
-        return new BTreeRow(clustering, Columns.NONE, primaryKeyLivenessInfo, DeletionTime.LIVE, BTree.empty(), minDeletionTime(primaryKeyLivenessInfo));
+        return new BTreeRow(clustering, primaryKeyLivenessInfo, DeletionTime.LIVE, BTree.empty(), minDeletionTime(primaryKeyLivenessInfo));
     }
 
     private static int minDeletionTime(Cell cell)
@@ -161,9 +159,9 @@ public class BTreeRow extends AbstractRow
         return clustering;
     }
 
-    public Columns columns()
+    public Collection<ColumnDefinition> columns()
     {
-        return columns;
+        return Collections2.transform(this, ColumnData::column);
     }
 
     public Collection<ColumnDefinition> actualColumns()
@@ -209,14 +207,14 @@ public class BTreeRow extends AbstractRow
         return (ComplexColumnData) BTree.<Object>find(btree, ColumnDefinition.asymmetricColumnDataComparator, c);
     }
 
-    public Iterator<ColumnData> iterator()
+    public int size()
     {
-        return searchIterator();
+        return BTree.size(btree);
     }
 
-    public int size()
+    public Iterator<ColumnData> iterator()
     {
-        return BTree.size(btree);
+        return searchIterator();
     }
 
     public Iterable<Cell> cells()
@@ -264,14 +262,21 @@ public class BTreeRow extends AbstractRow
 
             CFMetaData.DroppedColumn dropped = droppedColumns.get(column.name.bytes);
             if (column.isComplex())
-                return ((ComplexColumnData)cd).filter(filter, mayHaveShadowed ? activeDeletion : DeletionTime.LIVE, dropped);
+                return ((ComplexColumnData) cd).filter(filter, mayHaveShadowed ? activeDeletion : DeletionTime.LIVE, dropped);
 
-            Cell cell = (Cell)cd;
+            Cell cell = (Cell) cd;
             return (dropped == null || cell.timestamp() > dropped.droppedTime) && !(mayHaveShadowed && activeDeletion.deletes(cell))
                    ? cell : null;
         });
     }
 
+    public boolean hasComplex()
+    {
+        // We start by the end cause we know complex columns sort after the simple ones
+        ColumnData cd = Iterables.getFirst(BTree.<ColumnData>iterable(btree, BTree.Dir.DESC), null);
+        return cd != null && cd.column.isComplex();
+    }
+
     public boolean hasComplexDeletion()
     {
         // We start by the end cause we know complex columns sort before simple ones
@@ -334,7 +339,7 @@ public class BTreeRow extends AbstractRow
             return null;
 
         int minDeletionTime = minDeletionTime(transformed, info, deletion);
-        return new BTreeRow(clustering, columns, info, deletion, transformed, minDeletionTime);
+        return new BTreeRow(clustering, info, deletion, transformed, minDeletionTime);
     }
 
     public int dataSize()
@@ -359,14 +364,14 @@ public class BTreeRow extends AbstractRow
         return heapSize;
     }
 
-    public static Row.Builder sortedBuilder(Columns columns)
+    public static Row.Builder sortedBuilder()
     {
-        return new Builder(columns, true);
+        return new Builder(true);
     }
 
-    public static Row.Builder unsortedBuilder(Columns columns, int nowInSec)
+    public static Row.Builder unsortedBuilder(int nowInSec)
     {
-        return new Builder(columns, false, nowInSec);
+        return new Builder(false, nowInSec);
     }
 
     // This is only used by PartitionUpdate.CounterMark but other uses should be avoided as much as possible as it breaks our general
@@ -521,8 +526,6 @@ public class BTreeRow extends AbstractRow
             }
 
         };
-        protected final Columns columns;
-
         protected Clustering clustering;
         protected LivenessInfo primaryKeyLivenessInfo = LivenessInfo.EMPTY;
         protected DeletionTime deletion = DeletionTime.LIVE;
@@ -534,14 +537,13 @@ public class BTreeRow extends AbstractRow
 
         // For complex column at index i of 'columns', we store at complexDeletions[i] its complex deletion.
 
-        protected Builder(Columns columns, boolean isSorted)
+        protected Builder(boolean isSorted)
         {
-            this(columns, isSorted, Integer.MIN_VALUE);
+            this(isSorted, Integer.MIN_VALUE);
         }
 
-        protected Builder(Columns columns, boolean isSorted, int nowInSecs)
+        protected Builder(boolean isSorted, int nowInSecs)
         {
-            this.columns = columns;
             this.cells = BTree.builder(ColumnData.comparator);
             resolver = new CellResolver(nowInSecs);
             this.isSorted = isSorted;
@@ -605,7 +607,7 @@ public class BTreeRow extends AbstractRow
                 cells.resolve(resolver);
             Object[] btree = cells.build();
             int minDeletionTime = minDeletionTime(btree, primaryKeyLivenessInfo, deletion);
-            Row row = new BTreeRow(clustering, columns, primaryKeyLivenessInfo, deletion, btree, minDeletionTime);
+            Row row = new BTreeRow(clustering, primaryKeyLivenessInfo, deletion, btree, minDeletionTime);
             reset();
             return row;
         }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/rows/Row.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/rows/Row.java b/src/java/org/apache/cassandra/db/rows/Row.java
index 003dd04..1b18b38 100644
--- a/src/java/org/apache/cassandra/db/rows/Row.java
+++ b/src/java/org/apache/cassandra/db/rows/Row.java
@@ -60,7 +60,7 @@ public interface Row extends Unfiltered, Collection<ColumnData>
      *
      * @return a superset of the columns contained in this row.
      */
-    public Columns columns();
+    public Collection<ColumnDefinition> columns();
 
     /**
      * The row deletion.
@@ -163,6 +163,11 @@ public interface Row extends Unfiltered, Collection<ColumnData>
     public boolean hasComplexDeletion();
 
     /**
+     * Whether the row stores any (non-RT) data for any complex column.
+     */
+    boolean hasComplex();
+
+    /**
      * Whether the row has any deletion info (row deletion, cell tombstone, expired cell or complex deletion).
      *
      * @param nowInSec the current time in seconds to decid if a cell is expired.
@@ -312,7 +317,6 @@ public interface Row extends Unfiltered, Collection<ColumnData>
      */
     public static class Merger
     {
-        private final Columns columns;
         private final Row[] rows;
         private final List<Iterator<ColumnData>> columnDataIterators;
 
@@ -323,12 +327,11 @@ public interface Row extends Unfiltered, Collection<ColumnData>
         private final List<ColumnData> dataBuffer = new ArrayList<>();
         private final ColumnDataReducer columnDataReducer;
 
-        public Merger(int size, int nowInSec, Columns columns)
+        public Merger(int size, int nowInSec, boolean hasComplex)
         {
-            this.columns = columns;
             this.rows = new Row[size];
             this.columnDataIterators = new ArrayList<>(size);
-            this.columnDataReducer = new ColumnDataReducer(size, nowInSec, columns.hasComplex());
+            this.columnDataReducer = new ColumnDataReducer(size, nowInSec, hasComplex);
         }
 
         public void clear()
@@ -395,7 +398,7 @@ public interface Row extends Unfiltered, Collection<ColumnData>
             // Because some data might have been shadowed by the 'activeDeletion', we could have an empty row
             return rowInfo.isEmpty() && rowDeletion.isLive() && dataBuffer.isEmpty()
                  ? null
-                 : BTreeRow.create(clustering, columns, rowInfo, rowDeletion, BTree.build(dataBuffer, UpdateFunction.<ColumnData>noOp()));
+                 : BTreeRow.create(clustering, rowInfo, rowDeletion, BTree.build(dataBuffer, UpdateFunction.<ColumnData>noOp()));
         }
 
         public Clustering mergedClustering()

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/rows/Rows.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/rows/Rows.java b/src/java/org/apache/cassandra/db/rows/Rows.java
index ce177f2..bf9ed5e 100644
--- a/src/java/org/apache/cassandra/db/rows/Rows.java
+++ b/src/java/org/apache/cassandra/db/rows/Rows.java
@@ -17,9 +17,7 @@
  */
 package org.apache.cassandra.db.rows;
 
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
+import java.util.*;
 
 import com.google.common.collect.Iterators;
 import com.google.common.collect.PeekingIterator;
@@ -27,27 +25,13 @@ import com.google.common.collect.PeekingIterator;
 import org.apache.cassandra.config.ColumnDefinition;
 import org.apache.cassandra.db.*;
 import org.apache.cassandra.db.partitions.PartitionStatisticsCollector;
-import org.apache.cassandra.utils.SearchIterator;
+import org.apache.cassandra.utils.MergeIterator;
 
 /**
  * Static utilities to work on Row objects.
  */
 public abstract class Rows
 {
-    // TODO: we could have a that in a more generic place...
-    private static final SearchIterator<ColumnDefinition, ColumnData> EMPTY_SEARCH_ITERATOR = new SearchIterator<ColumnDefinition, ColumnData>()
-    {
-        public boolean hasNext()
-        {
-            return false;
-        }
-
-        public ColumnData next(ColumnDefinition column)
-        {
-            return null;
-        }
-    };
-
     private Rows() {}
 
     public static final Row EMPTY_STATIC_ROW = BTreeRow.emptyRow(Clustering.STATIC_CLUSTERING);
@@ -123,12 +107,11 @@ public abstract class Rows
      * each input and {@code merged} to {@code diffListener}.
      *
      * @param merged the result of merging {@code inputs}.
-     * @param columns a superset of all the columns in any of {@code merged}/{@code inputs}.
      * @param inputs the inputs whose merge yielded {@code merged}.
      * @param diffListener the listener to which to signal the differences between the inputs and the merged
      * result.
      */
-    public static void diff(RowDiffListener diffListener, Row merged, Columns columns, Row...inputs)
+    public static void diff(RowDiffListener diffListener, Row merged, Row...inputs)
     {
         Clustering clustering = merged.clustering();
         LivenessInfo mergedInfo = merged.primaryKeyLivenessInfo().isEmpty() ? null : merged.primaryKeyLivenessInfo();
@@ -145,81 +128,96 @@ public abstract class Rows
                 diffListener.onDeletion(i, clustering, mergedDeletion, inputDeletion);
         }
 
-        SearchIterator<ColumnDefinition, ColumnData> mergedIterator = merged.searchIterator();
-        List<SearchIterator<ColumnDefinition, ColumnData>> inputIterators = new ArrayList<>(inputs.length);
-
+        List<Iterator<ColumnData>> inputIterators = new ArrayList<>(1 + inputs.length);
+        inputIterators.add(merged.iterator());
         for (Row row : inputs)
-            inputIterators.add(row == null ? EMPTY_SEARCH_ITERATOR : row.searchIterator());
+            inputIterators.add(row == null ? Collections.emptyIterator() : row.iterator());
 
-        Iterator<ColumnDefinition> simpleColumns = columns.simpleColumns();
-        while (simpleColumns.hasNext())
+        Iterator<?> iter = MergeIterator.get(inputIterators, ColumnData.comparator, new MergeIterator.Reducer<ColumnData, Object>()
         {
-            ColumnDefinition column = simpleColumns.next();
-            Cell mergedCell = (Cell)mergedIterator.next(column);
-            for (int i = 0; i < inputs.length; i++)
+            ColumnData mergedData;
+            ColumnData[] inputDatas = new ColumnData[inputs.length];
+            public void reduce(int idx, ColumnData current)
             {
-                Cell inputCell = (Cell)inputIterators.get(i).next(column);
-                if (mergedCell != null || inputCell != null)
-                    diffListener.onCell(i, clustering, mergedCell, inputCell);
+                if (idx == 0)
+                    mergedData = current;
+                else
+                    inputDatas[idx - 1] = current;
             }
-        }
 
-        Iterator<ColumnDefinition> complexColumns = columns.complexColumns();
-        while (complexColumns.hasNext())
-        {
-            ColumnDefinition column = complexColumns.next();
-            ComplexColumnData mergedData = (ComplexColumnData)mergedIterator.next(column);
-            // Doing one input at a time is not the most efficient, but it's a lot simpler for now
-            for (int i = 0; i < inputs.length; i++)
+            protected Object getReduced()
             {
-                ComplexColumnData inputData = (ComplexColumnData)inputIterators.get(i).next(column);
-                if (mergedData == null)
-                {
-                    if (inputData == null)
-                        continue;
-
-                    // Everything in inputData has been shadowed
-                    if (!inputData.complexDeletion().isLive())
-                        diffListener.onComplexDeletion(i, clustering, column, null, inputData.complexDeletion());
-                    for (Cell inputCell : inputData)
-                        diffListener.onCell(i, clustering, null, inputCell);
-                }
-                else if (inputData == null)
-                {
-                    // Everything in inputData is new
-                    if (!mergedData.complexDeletion().isLive())
-                        diffListener.onComplexDeletion(i, clustering, column, mergedData.complexDeletion(), null);
-                    for (Cell mergedCell : mergedData)
-                        diffListener.onCell(i, clustering, mergedCell, null);
-                }
-                else
+                for (int i = 0 ; i != inputDatas.length ; i++)
                 {
-                    PeekingIterator<Cell> mergedCells = Iterators.peekingIterator(mergedData.iterator());
-                    PeekingIterator<Cell> inputCells = Iterators.peekingIterator(inputData.iterator());
-                    while (mergedCells.hasNext() && inputCells.hasNext())
+                    ColumnData input = inputDatas[i];
+                    if (mergedData != null || input != null)
                     {
-                        int cmp = column.cellPathComparator().compare(mergedCells.peek().path(), inputCells.peek().path());
-                        if (cmp == 0)
-                            diffListener.onCell(i, clustering, mergedCells.next(), inputCells.next());
-                        else if (cmp < 0)
-                            diffListener.onCell(i, clustering, mergedCells.next(), null);
-                        else // cmp > 0
-                            diffListener.onCell(i, clustering, null, inputCells.next());
+                        ColumnDefinition column = (mergedData != null ? mergedData : input).column;
+                        if (column.isSimple())
+                        {
+                            diffListener.onCell(i, clustering, (Cell) mergedData, (Cell) input);
+                        }
+                        else
+                        {
+                            ComplexColumnData mergedData = (ComplexColumnData) this.mergedData;
+                            ComplexColumnData inputData = (ComplexColumnData) input;
+                            if (mergedData == null)
+                            {
+                                // Everything in inputData has been shadowed
+                                if (!inputData.complexDeletion().isLive())
+                                    diffListener.onComplexDeletion(i, clustering, column, null, inputData.complexDeletion());
+                                for (Cell inputCell : inputData)
+                                    diffListener.onCell(i, clustering, null, inputCell);
+                            }
+                            else if (inputData == null)
+                            {
+                                // Everything in inputData is new
+                                if (!mergedData.complexDeletion().isLive())
+                                    diffListener.onComplexDeletion(i, clustering, column, mergedData.complexDeletion(), null);
+                                for (Cell mergedCell : mergedData)
+                                    diffListener.onCell(i, clustering, mergedCell, null);
+                            }
+                            else
+                            {
+                                PeekingIterator<Cell> mergedCells = Iterators.peekingIterator(mergedData.iterator());
+                                PeekingIterator<Cell> inputCells = Iterators.peekingIterator(inputData.iterator());
+                                while (mergedCells.hasNext() && inputCells.hasNext())
+                                {
+                                    int cmp = column.cellPathComparator().compare(mergedCells.peek().path(), inputCells.peek().path());
+                                    if (cmp == 0)
+                                        diffListener.onCell(i, clustering, mergedCells.next(), inputCells.next());
+                                    else if (cmp < 0)
+                                        diffListener.onCell(i, clustering, mergedCells.next(), null);
+                                    else // cmp > 0
+                                        diffListener.onCell(i, clustering, null, inputCells.next());
+                                }
+                                while (mergedCells.hasNext())
+                                    diffListener.onCell(i, clustering, mergedCells.next(), null);
+                                while (inputCells.hasNext())
+                                    diffListener.onCell(i, clustering, null, inputCells.next());
+                            }
+                        }
                     }
-                    while (mergedCells.hasNext())
-                        diffListener.onCell(i, clustering, mergedCells.next(), null);
-                    while (inputCells.hasNext())
-                        diffListener.onCell(i, clustering, null, inputCells.next());
+
                 }
+                return null;
             }
-        }
+
+            protected void onKeyChange()
+            {
+                mergedData = null;
+                Arrays.fill(inputDatas, null);
+            }
+        });
+
+        while (iter.hasNext())
+            iter.next();
     }
 
     public static Row merge(Row row1, Row row2, int nowInSec)
     {
-        Columns mergedColumns = row1.columns().mergeTo(row2.columns());
-        Row.Builder builder = BTreeRow.sortedBuilder(mergedColumns);
-        merge(row1, row2, mergedColumns, builder, nowInSec);
+        Row.Builder builder = BTreeRow.sortedBuilder();
+        merge(row1, row2, builder, nowInSec);
         return builder.build();
     }
 
@@ -227,7 +225,6 @@ public abstract class Rows
     // Return the minimum timestamp delta between existing and update
     public static long merge(Row existing,
                              Row update,
-                             Columns mergedColumns,
                              Row.Builder builder,
                              int nowInSec)
     {
@@ -248,37 +245,42 @@ public abstract class Rows
         builder.addPrimaryKeyLivenessInfo(mergedInfo);
         builder.addRowDeletion(deletion);
 
-        for (int i = 0; i < mergedColumns.simpleColumnCount(); i++)
+        Iterator<ColumnData> a = existing.iterator();
+        Iterator<ColumnData> b = update.iterator();
+        ColumnData nexta = a.hasNext() ? a.next() : null, nextb = b.hasNext() ? b.next() : null;
+        while (nexta != null | nextb != null)
         {
-            ColumnDefinition c = mergedColumns.getSimple(i);
-            Cell existingCell = existing.getCell(c);
-            Cell updateCell = update.getCell(c);
-            timeDelta = Math.min(timeDelta, Cells.reconcile(existingCell,
-                                                            updateCell,
-                                                            deletion,
-                                                            builder,
-                                                            nowInSec));
-        }
+            int comparison = nexta == null ? 1 : nextb == null ? -1 : nexta.column.compareTo(nextb.column);
+            ColumnData cura = comparison <= 0 ? nexta : null;
+            ColumnData curb = comparison >= 0 ? nextb : null;
+            ColumnDefinition column = (cura != null ? cura : curb).column;
+            if (column.isSimple())
+            {
+                timeDelta = Math.min(timeDelta, Cells.reconcile((Cell) cura, (Cell) curb, deletion, builder, nowInSec));
+            }
+            else
+            {
+                ComplexColumnData existingData = (ComplexColumnData) cura;
+                ComplexColumnData updateData = (ComplexColumnData) curb;
 
-        for (int i = 0; i < mergedColumns.complexColumnCount(); i++)
-        {
-            ColumnDefinition c = mergedColumns.getComplex(i);
-            ComplexColumnData existingData = existing.getComplexColumnData(c);
-            ComplexColumnData updateData = update.getComplexColumnData(c);
+                DeletionTime existingDt = existingData == null ? DeletionTime.LIVE : existingData.complexDeletion();
+                DeletionTime updateDt = updateData == null ? DeletionTime.LIVE : updateData.complexDeletion();
+                DeletionTime maxDt = existingDt.supersedes(updateDt) ? existingDt : updateDt;
+                if (maxDt.supersedes(deletion))
+                    builder.addComplexDeletion(column, maxDt);
+                else
+                    maxDt = deletion;
 
-            DeletionTime existingDt = existingData == null ? DeletionTime.LIVE : existingData.complexDeletion();
-            DeletionTime updateDt = updateData == null ? DeletionTime.LIVE : updateData.complexDeletion();
-            DeletionTime maxDt = existingDt.supersedes(updateDt) ? existingDt : updateDt;
-            if (maxDt.supersedes(deletion))
-                builder.addComplexDeletion(c, maxDt);
-            else
-                maxDt = deletion;
+                Iterator<Cell> existingCells = existingData == null ? null : existingData.iterator();
+                Iterator<Cell> updateCells = updateData == null ? null : updateData.iterator();
+                timeDelta = Math.min(timeDelta, Cells.reconcileComplex(column, existingCells, updateCells, maxDt, builder, nowInSec));
+            }
 
-            Iterator<Cell> existingCells = existingData == null ? null : existingData.iterator();
-            Iterator<Cell> updateCells = updateData == null ? null : updateData.iterator();
-            timeDelta = Math.min(timeDelta, Cells.reconcileComplex(c, existingCells, updateCells, maxDt, builder, nowInSec));
+            if (cura != null)
+                nexta = a.hasNext() ? a.next() : null;
+            if (curb != null)
+                nextb = b.hasNext() ? b.next() : null;
         }
-
         return timeDelta;
     }
 }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorSerializer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorSerializer.java b/src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorSerializer.java
index 2102534..e1b2c09 100644
--- a/src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorSerializer.java
+++ b/src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorSerializer.java
@@ -204,7 +204,7 @@ public class UnfilteredRowIteratorSerializer
         final SerializationHeader sHeader = header.sHeader;
         return new AbstractUnfilteredRowIterator(metadata, header.key, header.partitionDeletion, sHeader.columns(), header.staticRow, header.isReversed, sHeader.stats())
         {
-            private final Row.Builder builder = BTreeRow.sortedBuilder(sHeader.columns().regulars);
+            private final Row.Builder builder = BTreeRow.sortedBuilder();
 
             protected Unfiltered computeNext()
             {

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/rows/UnfilteredRowIterators.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/rows/UnfilteredRowIterators.java b/src/java/org/apache/cassandra/db/rows/UnfilteredRowIterators.java
index 477eac9..e251670 100644
--- a/src/java/org/apache/cassandra/db/rows/UnfilteredRowIterators.java
+++ b/src/java/org/apache/cassandra/db/rows/UnfilteredRowIterators.java
@@ -48,7 +48,7 @@ public abstract class UnfilteredRowIterators
     {
         public void onMergedPartitionLevelDeletion(DeletionTime mergedDeletion, DeletionTime[] versions);
 
-        public void onMergedRows(Row merged, Columns columns, Row[] versions);
+        public void onMergedRows(Row merged, Row[] versions);
         public void onMergedRangeTombstoneMarkers(RangeTombstoneMarker merged, RangeTombstoneMarker[] versions);
 
         public void close();
@@ -100,7 +100,7 @@ public abstract class UnfilteredRowIterators
     public static UnfilteredRowIterator noRowsIterator(final CFMetaData cfm, final DecoratedKey partitionKey, final Row staticRow, final DeletionTime partitionDeletion, final boolean isReverseOrder)
     {
         PartitionColumns columns = staticRow == null ? PartitionColumns.NONE
-                                                     : new PartitionColumns(staticRow.columns(), Columns.NONE);
+                                                     : new PartitionColumns(Columns.from(staticRow.columns()), Columns.NONE);
         return new UnfilteredRowIterator()
         {
             public CFMetaData metadata()
@@ -241,7 +241,7 @@ public abstract class UnfilteredRowIterators
             @Override
             protected Row computeNextStatic(Row row)
             {
-                Row.Builder staticBuilder = allocator.cloningBTreeRowBuilder(columns().statics);
+                Row.Builder staticBuilder = allocator.cloningBTreeRowBuilder();
                 return Rows.copy(row, staticBuilder).build();
             }
 
@@ -249,7 +249,7 @@ public abstract class UnfilteredRowIterators
             protected Row computeNext(Row row)
             {
                 if (regularBuilder == null)
-                    regularBuilder = allocator.cloningBTreeRowBuilder(columns().regulars);
+                    regularBuilder = allocator.cloningBTreeRowBuilder();
 
                 return Rows.copy(row, regularBuilder).build();
             }
@@ -377,7 +377,7 @@ public abstract class UnfilteredRowIterators
                   iterators.get(0).partitionKey(),
                   partitionDeletion,
                   columns,
-                  mergeStaticRows(metadata, iterators, columns.statics, nowInSec, listener, partitionDeletion),
+                  mergeStaticRows(iterators, columns.statics, nowInSec, listener, partitionDeletion),
                   reversed,
                   mergeStats(iterators));
 
@@ -450,8 +450,7 @@ public abstract class UnfilteredRowIterators
             return delTime;
         }
 
-        private static Row mergeStaticRows(CFMetaData metadata,
-                                           List<UnfilteredRowIterator> iterators,
+        private static Row mergeStaticRows(List<UnfilteredRowIterator> iterators,
                                            Columns columns,
                                            int nowInSec,
                                            MergeListener listener,
@@ -463,7 +462,7 @@ public abstract class UnfilteredRowIterators
             if (iterators.stream().allMatch(iter -> iter.staticRow().isEmpty()))
                 return Rows.EMPTY_STATIC_ROW;
 
-            Row.Merger merger = new Row.Merger(iterators.size(), nowInSec, columns);
+            Row.Merger merger = new Row.Merger(iterators.size(), nowInSec, columns.hasComplex());
             for (int i = 0; i < iterators.size(); i++)
                 merger.add(i, iterators.get(i).staticRow());
 
@@ -471,7 +470,7 @@ public abstract class UnfilteredRowIterators
             if (merged == null)
                 merged = Rows.EMPTY_STATIC_ROW;
             if (listener != null)
-                listener.onMergedRows(merged, columns, merger.mergedRows());
+                listener.onMergedRows(merged, merger.mergedRows());
             return merged;
         }
 
@@ -530,7 +529,7 @@ public abstract class UnfilteredRowIterators
 
             private MergeReducer(int size, boolean reversed, int nowInSec, MergeListener listener)
             {
-                this.rowMerger = new Row.Merger(size, nowInSec, columns().regulars);
+                this.rowMerger = new Row.Merger(size, nowInSec, columns().regulars.hasComplex());
                 this.markerMerger = new RangeTombstoneMarker.Merger(size, partitionLevelDeletion(), reversed);
                 this.listener = listener;
             }
@@ -557,7 +556,7 @@ public abstract class UnfilteredRowIterators
                 {
                     Row merged = rowMerger.merge(markerMerger.activeDeletion());
                     if (listener != null)
-                        listener.onMergedRows(merged == null ? BTreeRow.emptyRow(rowMerger.mergedClustering()) : merged, columns().regulars, rowMerger.mergedRows());
+                        listener.onMergedRows(merged == null ? BTreeRow.emptyRow(rowMerger.mergedClustering()) : merged, rowMerger.mergedRows());
                     return merged;
                 }
                 else

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/rows/UnfilteredSerializer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/rows/UnfilteredSerializer.java b/src/java/org/apache/cassandra/db/rows/UnfilteredSerializer.java
index 60bd06f..605a67b 100644
--- a/src/java/org/apache/cassandra/db/rows/UnfilteredSerializer.java
+++ b/src/java/org/apache/cassandra/db/rows/UnfilteredSerializer.java
@@ -287,7 +287,7 @@ public class UnfilteredSerializer
     {
         int flags = in.readUnsignedByte();
         assert !isEndOfPartition(flags) && kind(flags) == Unfiltered.Kind.ROW && isStatic(flags) : flags;
-        Row.Builder builder = BTreeRow.sortedBuilder(helper.fetchedStaticColumns(header));
+        Row.Builder builder = BTreeRow.sortedBuilder();
         builder.newRow(Clustering.STATIC_CLUSTERING);
         return deserializeRowBody(in, header, helper, flags, builder);
     }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/db/view/MaterializedView.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/view/MaterializedView.java b/src/java/org/apache/cassandra/db/view/MaterializedView.java
index 0f6cf06..5128b7c 100644
--- a/src/java/org/apache/cassandra/db/view/MaterializedView.java
+++ b/src/java/org/apache/cassandra/db/view/MaterializedView.java
@@ -268,7 +268,7 @@ public class MaterializedView
                                             int nowInSec)
     {
         CFMetaData viewCfm = getViewCfs().metadata;
-        Row.Builder builder = BTreeRow.unsortedBuilder(viewCfm.partitionColumns().regulars, nowInSec);
+        Row.Builder builder = BTreeRow.unsortedBuilder(nowInSec);
         builder.newRow(viewClustering(temporalRow, resolver));
         builder.addRowDeletion(deletionTime);
         return PartitionUpdate.singleRowUpdate(viewCfm, partitionKey, builder.build());
@@ -286,7 +286,7 @@ public class MaterializedView
     {
 
         CFMetaData viewCfm = getViewCfs().metadata;
-        Row.Builder builder = BTreeRow.unsortedBuilder(viewCfm.partitionColumns().regulars, nowInSec);
+        Row.Builder builder = BTreeRow.unsortedBuilder(nowInSec);
         builder.newRow(viewClustering(temporalRow, resolver));
         builder.addComplexDeletion(deletedColumn, deletionTime);
         return PartitionUpdate.singleRowUpdate(viewCfm, partitionKey, builder.build());
@@ -363,7 +363,7 @@ public class MaterializedView
             return null;
         }
 
-        Row.Builder regularBuilder = BTreeRow.unsortedBuilder(viewCfs.metadata.partitionColumns().regulars, temporalRow.nowInSec);
+        Row.Builder regularBuilder = BTreeRow.unsortedBuilder(temporalRow.nowInSec);
 
         CBuilder clustering = CBuilder.create(viewCfs.getComparator());
         for (int i = 0; i < viewCfs.metadata.clusteringColumns().size(); i++)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/index/SecondaryIndexManager.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/SecondaryIndexManager.java b/src/java/org/apache/cassandra/index/SecondaryIndexManager.java
index 6f30305..fabfebc 100644
--- a/src/java/org/apache/cassandra/index/SecondaryIndexManager.java
+++ b/src/java/org/apache/cassandra/index/SecondaryIndexManager.java
@@ -740,9 +740,9 @@ public class SecondaryIndexManager implements IndexRegistry
 
         public void onUpdated(Row existing, Row updated)
         {
-            final Row.Builder toRemove = BTreeRow.sortedBuilder(existing.columns());
+            final Row.Builder toRemove = BTreeRow.sortedBuilder();
             toRemove.newRow(existing.clustering());
-            final Row.Builder toInsert = BTreeRow.sortedBuilder(updated.columns());
+            final Row.Builder toInsert = BTreeRow.sortedBuilder();
             toInsert.newRow(updated.clustering());
             // diff listener collates the columns to be added & removed from the indexes
             RowDiffListener diffListener = new RowDiffListener()
@@ -771,7 +771,7 @@ public class SecondaryIndexManager implements IndexRegistry
 
                 }
             };
-            Rows.diff(diffListener, updated, updated.columns().mergeTo(existing.columns()), existing);
+            Rows.diff(diffListener, updated, existing);
             Row oldRow = toRemove.build();
             Row newRow = toInsert.build();
             for (Index.Indexer indexer : indexers)
@@ -834,7 +834,7 @@ public class SecondaryIndexManager implements IndexRegistry
                 rows = new Row[versions];
         }
 
-        public void onRowMerge(Columns columns, Row merged, Row...versions)
+        public void onRowMerge(Row merged, Row...versions)
         {
             // Diff listener constructs rows representing deltas between the merged and original versions
             // These delta rows are then passed to registered indexes for removal processing
@@ -859,7 +859,7 @@ public class SecondaryIndexManager implements IndexRegistry
                     {
                         if (builders[i] == null)
                         {
-                            builders[i] = BTreeRow.sortedBuilder(columns);
+                            builders[i] = BTreeRow.sortedBuilder();
                             builders[i].newRow(clustering);
                         }
                         builders[i].addCell(original);
@@ -867,7 +867,7 @@ public class SecondaryIndexManager implements IndexRegistry
                 }
             };
 
-            Rows.diff(diffListener, merged, columns, versions);
+            Rows.diff(diffListener, merged, versions);
 
             for(int i = 0; i < builders.length; i++)
                 if (builders[i] != null)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/index/transactions/CompactionTransaction.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/transactions/CompactionTransaction.java b/src/java/org/apache/cassandra/index/transactions/CompactionTransaction.java
index a9fbf41..f2436af 100644
--- a/src/java/org/apache/cassandra/index/transactions/CompactionTransaction.java
+++ b/src/java/org/apache/cassandra/index/transactions/CompactionTransaction.java
@@ -18,7 +18,6 @@
 
 package org.apache.cassandra.index.transactions;
 
-import org.apache.cassandra.db.Columns;
 import org.apache.cassandra.db.rows.Row;
 
 /**
@@ -33,12 +32,12 @@ import org.apache.cassandra.db.rows.Row;
  */
 public interface CompactionTransaction extends IndexTransaction
 {
-    void onRowMerge(Columns columns, Row merged, Row...versions);
+    void onRowMerge(Row merged, Row...versions);
 
     CompactionTransaction NO_OP = new CompactionTransaction()
     {
         public void start(){}
-        public void onRowMerge(Columns columns, Row merged, Row...versions){}
+        public void onRowMerge(Row merged, Row...versions){}
         public void commit(){}
     };
 }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/io/sstable/SSTableSimpleIterator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/io/sstable/SSTableSimpleIterator.java b/src/java/org/apache/cassandra/io/sstable/SSTableSimpleIterator.java
index 7382e5e..365d469 100644
--- a/src/java/org/apache/cassandra/io/sstable/SSTableSimpleIterator.java
+++ b/src/java/org/apache/cassandra/io/sstable/SSTableSimpleIterator.java
@@ -71,7 +71,7 @@ public abstract class SSTableSimpleIterator extends AbstractIterator<Unfiltered>
         {
             super(metadata, in, helper);
             this.header = header;
-            this.builder = BTreeRow.sortedBuilder(helper.fetchedRegularColumns(header));
+            this.builder = BTreeRow.sortedBuilder();
         }
 
         public Row readStaticRow() throws IOException

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/service/DataResolver.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/service/DataResolver.java b/src/java/org/apache/cassandra/service/DataResolver.java
index cd69ef3..0eb13c3 100644
--- a/src/java/org/apache/cassandra/service/DataResolver.java
+++ b/src/java/org/apache/cassandra/service/DataResolver.java
@@ -212,7 +212,7 @@ public class DataResolver extends ResponseResolver
             {
                 if (currentRows[i] == null)
                 {
-                    currentRows[i] = BTreeRow.sortedBuilder(clustering == Clustering.STATIC_CLUSTERING ? columns.statics : columns.regulars);
+                    currentRows[i] = BTreeRow.sortedBuilder();
                     currentRows[i].newRow(clustering);
                 }
                 return currentRows[i];
@@ -227,7 +227,7 @@ public class DataResolver extends ResponseResolver
                 }
             }
 
-            public void onMergedRows(Row merged, Columns columns, Row[] versions)
+            public void onMergedRows(Row merged, Row[] versions)
             {
                 // If a row was shadowed post merged, it must be by a partition level or range tombstone, and we handle
                 // those case directly in their respective methods (in other words, it would be inefficient to send a row
@@ -235,7 +235,7 @@ public class DataResolver extends ResponseResolver
                 if (merged.isEmpty())
                     return;
 
-                Rows.diff(diffListener, merged, columns, versions);
+                Rows.diff(diffListener, merged, versions);
                 for (int i = 0; i < currentRows.length; i++)
                 {
                     if (currentRows[i] != null)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/thrift/ThriftResultsMerger.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/thrift/ThriftResultsMerger.java b/src/java/org/apache/cassandra/thrift/ThriftResultsMerger.java
index 084e835..72e4399 100644
--- a/src/java/org/apache/cassandra/thrift/ThriftResultsMerger.java
+++ b/src/java/org/apache/cassandra/thrift/ThriftResultsMerger.java
@@ -112,14 +112,14 @@ public class ThriftResultsMerger extends WrappingUnfilteredPartitionIterator
             super(results);
             assert results.metadata().isStaticCompactTable();
             this.nowInSec = nowInSec;
-            this.builder = BTreeRow.sortedBuilder(results.columns().regulars);
+            this.builder = BTreeRow.sortedBuilder();
         }
 
         private void init()
         {
             assert !isInit;
             Row staticRow = super.staticRow();
-            assert staticRow.columns().complexColumnCount() == 0;
+            assert !staticRow.hasComplex();
 
             staticCells = staticRow.cells().iterator();
             updateNextToMerge();
@@ -220,7 +220,7 @@ public class ThriftResultsMerger extends WrappingUnfilteredPartitionIterator
             this.superColumnMapColumn = results.metadata().compactValueColumn();
             assert superColumnMapColumn != null && superColumnMapColumn.type instanceof MapType;
 
-            this.builder = BTreeRow.sortedBuilder(Columns.of(superColumnMapColumn));
+            this.builder = BTreeRow.sortedBuilder();
             this.columnComparator = ((MapType)superColumnMapColumn.type).nameComparator();
         }
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/utils/memory/AbstractAllocator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/memory/AbstractAllocator.java b/src/java/org/apache/cassandra/utils/memory/AbstractAllocator.java
index a76732f..9066335 100644
--- a/src/java/org/apache/cassandra/utils/memory/AbstractAllocator.java
+++ b/src/java/org/apache/cassandra/utils/memory/AbstractAllocator.java
@@ -46,18 +46,18 @@ public abstract class AbstractAllocator
 
     public abstract ByteBuffer allocate(int size);
 
-    public Row.Builder cloningBTreeRowBuilder(Columns columns)
+    public Row.Builder cloningBTreeRowBuilder()
     {
-        return new CloningBTreeRowBuilder(columns, this);
+        return new CloningBTreeRowBuilder(this);
     }
 
     private static class CloningBTreeRowBuilder extends BTreeRow.Builder
     {
         private final AbstractAllocator allocator;
 
-        private CloningBTreeRowBuilder(Columns columns, AbstractAllocator allocator)
+        private CloningBTreeRowBuilder(AbstractAllocator allocator)
         {
-            super(columns, true);
+            super(true);
             this.allocator = allocator;
         }
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/utils/memory/MemtableAllocator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/memory/MemtableAllocator.java b/src/java/org/apache/cassandra/utils/memory/MemtableAllocator.java
index 15499ae..588b433 100644
--- a/src/java/org/apache/cassandra/utils/memory/MemtableAllocator.java
+++ b/src/java/org/apache/cassandra/utils/memory/MemtableAllocator.java
@@ -59,7 +59,7 @@ public abstract class MemtableAllocator
         this.offHeap = offHeap;
     }
 
-    public abstract Row.Builder rowBuilder(CFMetaData metadata, OpOrder.Group opGroup, boolean isStatic);
+    public abstract Row.Builder rowBuilder(OpOrder.Group opGroup);
     public abstract DecoratedKey clone(DecoratedKey key, OpOrder.Group opGroup);
     public abstract DataReclaimer reclaimer();
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/utils/memory/MemtableBufferAllocator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/memory/MemtableBufferAllocator.java b/src/java/org/apache/cassandra/utils/memory/MemtableBufferAllocator.java
index 8205f3b..fb35b38 100644
--- a/src/java/org/apache/cassandra/utils/memory/MemtableBufferAllocator.java
+++ b/src/java/org/apache/cassandra/utils/memory/MemtableBufferAllocator.java
@@ -31,10 +31,9 @@ public abstract class MemtableBufferAllocator extends MemtableAllocator
         super(onHeap, offHeap);
     }
 
-    public Row.Builder rowBuilder(CFMetaData metadata, OpOrder.Group writeOp, boolean isStatic)
+    public Row.Builder rowBuilder(OpOrder.Group writeOp)
     {
-        Columns columns = isStatic ? metadata.partitionColumns().statics : metadata.partitionColumns().regulars;
-        return allocator(writeOp).cloningBTreeRowBuilder(columns);
+        return allocator(writeOp).cloningBTreeRowBuilder();
     }
 
     public DecoratedKey clone(DecoratedKey key, OpOrder.Group writeOp)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/src/java/org/apache/cassandra/utils/memory/NativeAllocator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/memory/NativeAllocator.java b/src/java/org/apache/cassandra/utils/memory/NativeAllocator.java
index 7b95430..e5458b4 100644
--- a/src/java/org/apache/cassandra/utils/memory/NativeAllocator.java
+++ b/src/java/org/apache/cassandra/utils/memory/NativeAllocator.java
@@ -53,7 +53,7 @@ public class NativeAllocator extends MemtableAllocator
         super(pool.onHeap.newAllocator(), pool.offHeap.newAllocator());
     }
 
-    public Row.Builder rowBuilder(CFMetaData metadata, OpOrder.Group opGroup, boolean isStatic)
+    public Row.Builder rowBuilder(OpOrder.Group opGroup)
     {
         // TODO
         throw new UnsupportedOperationException();

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/test/unit/org/apache/cassandra/db/RowTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/db/RowTest.java b/test/unit/org/apache/cassandra/db/RowTest.java
index cd80a2f..0af183c 100644
--- a/test/unit/org/apache/cassandra/db/RowTest.java
+++ b/test/unit/org/apache/cassandra/db/RowTest.java
@@ -127,7 +127,7 @@ public class RowTest
         ColumnDefinition defA = cfm.getColumnDefinition(new ColumnIdentifier("a", true));
         ColumnDefinition defB = cfm.getColumnDefinition(new ColumnIdentifier("b", true));
 
-        Row.Builder builder = BTreeRow.unsortedBuilder(cfm.partitionColumns().regulars, nowInSeconds);
+        Row.Builder builder = BTreeRow.unsortedBuilder(nowInSeconds);
         builder.newRow(cfm.comparator.make("c1"));
         writeSimpleCellValue(builder, cfm, defA, "a1", 0);
         writeSimpleCellValue(builder, cfm, defA, "a2", 1);

http://git-wip-us.apache.org/repos/asf/cassandra/blob/aa576263/test/unit/org/apache/cassandra/triggers/TriggerExecutorTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/triggers/TriggerExecutorTest.java b/test/unit/org/apache/cassandra/triggers/TriggerExecutorTest.java
index 09a337a..44391c8 100644
--- a/test/unit/org/apache/cassandra/triggers/TriggerExecutorTest.java
+++ b/test/unit/org/apache/cassandra/triggers/TriggerExecutorTest.java
@@ -268,7 +268,7 @@ public class TriggerExecutorTest
 
     private static PartitionUpdate makeCf(CFMetaData metadata, String key, String columnValue1, String columnValue2)
     {
-        Row.Builder builder = BTreeRow.unsortedBuilder(metadata.partitionColumns().regulars, FBUtilities.nowInSeconds());
+        Row.Builder builder = BTreeRow.unsortedBuilder(FBUtilities.nowInSeconds());
         builder.newRow(Clustering.EMPTY);
         long ts = FBUtilities.timestampMicros();
         if (columnValue1 != null)


[3/3] cassandra git commit: Merge branch 'cassandra-3.0' into trunk

Posted by be...@apache.org.
Merge branch 'cassandra-3.0' into trunk


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/ae89731f
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/ae89731f
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/ae89731f

Branch: refs/heads/trunk
Commit: ae89731f07020f3ce921fbf96a5adb424ac4f9c5
Parents: 356dd5b aa57626
Author: Benedict Elliott Smith <be...@apache.org>
Authored: Fri Sep 4 13:47:22 2015 +0100
Committer: Benedict Elliott Smith <be...@apache.org>
Committed: Fri Sep 4 13:47:22 2015 +0100

----------------------------------------------------------------------
 .../apache/cassandra/cql3/UpdateParameters.java |   4 +-
 src/java/org/apache/cassandra/db/Columns.java   |   2 +-
 .../org/apache/cassandra/db/LegacyLayout.java   |   4 +-
 .../apache/cassandra/db/RowUpdateBuilder.java   |   6 +-
 .../cassandra/db/SerializationHeader.java       |   1 -
 .../cassandra/db/UnfilteredDeserializer.java    |   2 +-
 .../db/compaction/CompactionIterator.java       |   4 +-
 .../db/partitions/AtomicBTreePartition.java     |   8 +-
 .../db/partitions/PartitionUpdate.java          |  10 +-
 .../apache/cassandra/db/rows/AbstractRow.java   |   1 -
 .../org/apache/cassandra/db/rows/BTreeRow.java  |  64 +++---
 src/java/org/apache/cassandra/db/rows/Row.java  |  15 +-
 src/java/org/apache/cassandra/db/rows/Rows.java | 214 ++++++++++---------
 .../rows/UnfilteredRowIteratorSerializer.java   |   2 +-
 .../db/rows/UnfilteredRowIterators.java         |  21 +-
 .../cassandra/db/rows/UnfilteredSerializer.java |   2 +-
 .../cassandra/db/view/MaterializedView.java     |   6 +-
 .../cassandra/index/SecondaryIndexManager.java  |  12 +-
 .../transactions/CompactionTransaction.java     |   5 +-
 .../io/sstable/SSTableSimpleIterator.java       |   2 +-
 .../apache/cassandra/service/DataResolver.java  |   6 +-
 .../cassandra/thrift/ThriftResultsMerger.java   |   6 +-
 .../utils/memory/AbstractAllocator.java         |   8 +-
 .../utils/memory/MemtableAllocator.java         |   2 +-
 .../utils/memory/MemtableBufferAllocator.java   |   5 +-
 .../cassandra/utils/memory/NativeAllocator.java |   2 +-
 test/unit/org/apache/cassandra/db/RowTest.java  |   2 +-
 .../cassandra/triggers/TriggerExecutorTest.java |   2 +-
 28 files changed, 210 insertions(+), 208 deletions(-)
----------------------------------------------------------------------