You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by bl...@apache.org on 2017/03/10 09:17:09 UTC
[05/10] cassandra git commit: Merge branch cassandra-2.2 into
cassandra-3.0
Merge branch cassandra-2.2 into cassandra-3.0
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/aeca1d2b
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/aeca1d2b
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/aeca1d2b
Branch: refs/heads/cassandra-3.11
Commit: aeca1d2bd8e395a2897c3e36224f49b586babd4e
Parents: 31dec3d 5ef8a8b
Author: Benjamin Lerer <b....@gmail.com>
Authored: Fri Mar 10 10:01:01 2017 +0100
Committer: Benjamin Lerer <b....@gmail.com>
Committed: Fri Mar 10 10:02:21 2017 +0100
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../apache/cassandra/cql3/UpdateParameters.java | 24 ++++-
.../org/apache/cassandra/db/rows/BTreeRow.java | 43 ++++++--
src/java/org/apache/cassandra/db/rows/Row.java | 6 ++
.../org/apache/cassandra/utils/btree/BTree.java | 19 ++++
.../validation/entities/CollectionsTest.java | 100 +++++++++++++++++++
.../apache/cassandra/db/rows/RowBuilder.java | 7 ++
7 files changed, 191 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/aeca1d2b/CHANGES.txt
----------------------------------------------------------------------
diff --cc CHANGES.txt
index 1876922,09e4039..52a794b
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@@ -1,20 -1,6 +1,21 @@@
-2.2.10
+3.0.13
+ * Slice.isEmpty() returns false for some empty slices (CASSANDRA-13305)
+ * Add formatted row output to assertEmpty in CQL Tester (CASSANDRA-13238)
+Merged from 2.2:
+ * Fix queries updating multiple time the same list (CASSANDRA-13130)
* Fix GRANT/REVOKE when keyspace isn't specified (CASSANDRA-13053)
+
+
+3.0.12
+ * Prevent data loss on upgrade 2.1 - 3.0 by adding component separator to LogRecord absolute path (CASSANDRA-13294)
+ * Improve testing on macOS by eliminating sigar logging (CASSANDRA-13233)
+ * Cqlsh copy-from should error out when csv contains invalid data for collections (CASSANDRA-13071)
+ * Update c.yaml doc for offheap memtables (CASSANDRA-13179)
+ * Faster StreamingHistogram (CASSANDRA-13038)
+ * Legacy deserializer can create unexpected boundary range tombstones (CASSANDRA-13237)
+ * Remove unnecessary assertion from AntiCompactionTest (CASSANDRA-13070)
+ * Fix cqlsh COPY for dates before 1900 (CASSANDRA-13185)
+Merged from 2.2:
* Avoid race on receiver by starting streaming sender thread after sending init message (CASSANDRA-12886)
* Fix "multiple versions of ant detected..." when running ant test (CASSANDRA-13232)
* Coalescing strategy sleeps too much (CASSANDRA-13090)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/aeca1d2b/src/java/org/apache/cassandra/cql3/UpdateParameters.java
----------------------------------------------------------------------
diff --cc src/java/org/apache/cassandra/cql3/UpdateParameters.java
index 0c58097,65edef7..d902dec
--- a/src/java/org/apache/cassandra/cql3/UpdateParameters.java
+++ b/src/java/org/apache/cassandra/cql3/UpdateParameters.java
@@@ -80,134 -59,71 +80,156 @@@ public class UpdateParameter
throw new InvalidRequestException(String.format("Out of bound timestamp, must be in [%d, %d]", Long.MIN_VALUE + 1, Long.MAX_VALUE));
}
- public Cell makeColumn(CellName name, ByteBuffer value) throws InvalidRequestException
+ public void newRow(Clustering clustering) throws InvalidRequestException
+ {
+ if (metadata.isDense() && !metadata.isCompound())
+ {
+ // If it's a COMPACT STORAGE table with a single clustering column, the clustering value is
+ // translated in Thrift to the full Thrift column name, and for backward compatibility we
+ // don't want to allow that to be empty (even though this would be fine for the storage engine).
+ assert clustering.size() == 1;
+ ByteBuffer value = clustering.get(0);
+ if (value == null || !value.hasRemaining())
+ throw new InvalidRequestException("Invalid empty or null value for column " + metadata.clusteringColumns().get(0).name);
+ }
+
+ if (clustering == Clustering.STATIC_CLUSTERING)
+ {
+ if (staticBuilder == null)
+ staticBuilder = BTreeRow.unsortedBuilder(nowInSec);
+ builder = staticBuilder;
+ }
+ else
+ {
+ if (regularBuilder == null)
+ regularBuilder = BTreeRow.unsortedBuilder(nowInSec);
+ builder = regularBuilder;
+ }
+
+ builder.newRow(clustering);
+ }
+
+ public Clustering currentClustering()
+ {
+ return builder.clustering();
+ }
+
+ public void addPrimaryKeyLivenessInfo()
+ {
+ builder.addPrimaryKeyLivenessInfo(LivenessInfo.create(metadata, timestamp, ttl, nowInSec));
+ }
+
+ public void addRowDeletion()
+ {
+ // For compact tables, at the exclusion of the static row (of static compact tables), each row ever has a single column,
+ // the "compact" one. As such, deleting the row or deleting that single cell is equivalent. We favor the later however
+ // because that makes it easier when translating back to the old format layout (for thrift and pre-3.0 backward
+ // compatibility) as we don't have to special case for the row deletion. This is also in line with what we used to do pre-3.0.
+ if (metadata.isCompactTable() && builder.clustering() != Clustering.STATIC_CLUSTERING)
+ addTombstone(metadata.compactValueColumn());
+ else
+ builder.addRowDeletion(Row.Deletion.regular(deletionTime));
+ }
+
+ public void addTombstone(ColumnDefinition column) throws InvalidRequestException
{
- QueryProcessor.validateCellName(name, metadata.comparator);
- return AbstractCell.create(name, value, timestamp, ttl, metadata);
+ addTombstone(column, null);
}
- public Cell makeCounter(CellName name, long delta) throws InvalidRequestException
- {
- QueryProcessor.validateCellName(name, metadata.comparator);
- return new BufferCounterUpdateCell(name, delta, FBUtilities.timestampMicros());
- }
+ public void addTombstone(ColumnDefinition column, CellPath path) throws InvalidRequestException
+ {
+ builder.addCell(BufferCell.tombstone(column, timestamp, nowInSec, path));
+ }
+
+ public void addCell(ColumnDefinition column, ByteBuffer value) throws InvalidRequestException
+ {
+ addCell(column, null, value);
+ }
+
+ public void addCell(ColumnDefinition column, CellPath path, ByteBuffer value) throws InvalidRequestException
+ {
+ Cell cell = ttl == LivenessInfo.NO_TTL
+ ? BufferCell.live(metadata, column, timestamp, value, path)
+ : BufferCell.expiring(column, timestamp, ttl, nowInSec, value, path);
+ builder.addCell(cell);
+ }
+
+ public void addCounter(ColumnDefinition column, long increment) throws InvalidRequestException
+ {
+ assert ttl == LivenessInfo.NO_TTL;
+
+ // Because column is a counter, we need the value to be a CounterContext. However, we're only creating a
+ // "counter update", which is a temporary state until we run into 'CounterMutation.updateWithCurrentValue()'
+ // which does the read-before-write and sets the proper CounterId, clock and updated value.
+ //
+ // We thus create a "fake" local shard here. The CounterId/clock used don't matter as this is just a temporary
+ // state that will be replaced when processing the mutation in CounterMutation, but the reason we use a 'local'
+ // shard is due to the merging rules: if a user includes multiple updates to the same counter in a batch, those
+ // multiple updates will be merged in the PartitionUpdate *before* they even reach CounterMutation. So we need
+ // such update to be added together, and that's what a local shard gives us.
+ builder.addCell(BufferCell.live(metadata, column, timestamp, CounterContext.instance().createLocal(increment)));
+ }
- public Cell makeTombstone(CellName name) throws InvalidRequestException
+ public void setComplexDeletionTime(ColumnDefinition column)
{
- QueryProcessor.validateCellName(name, metadata.comparator);
- return new BufferDeletedCell(name, localDeletionTime, timestamp);
+ builder.addComplexDeletion(column, deletionTime);
}
- public RangeTombstone makeRangeTombstone(ColumnSlice slice) throws InvalidRequestException
+ public void setComplexDeletionTimeForOverwrite(ColumnDefinition column)
{
- QueryProcessor.validateComposite(slice.start, metadata.comparator);
- QueryProcessor.validateComposite(slice.finish, metadata.comparator);
- return new RangeTombstone(slice.start, slice.finish, timestamp, localDeletionTime);
+ builder.addComplexDeletion(column, new DeletionTime(deletionTime.markedForDeleteAt() - 1, deletionTime.localDeletionTime()));
}
- public RangeTombstone makeTombstoneForOverwrite(ColumnSlice slice) throws InvalidRequestException
+ public Row buildRow()
{
- QueryProcessor.validateComposite(slice.start, metadata.comparator);
- QueryProcessor.validateComposite(slice.finish, metadata.comparator);
- return new RangeTombstone(slice.start, slice.finish, timestamp - 1, localDeletionTime);
+ Row built = builder.build();
+ builder = null; // Resetting to null just so we quickly bad usage where we forget to call newRow() after that.
+ return built;
+ }
+
+ public DeletionTime deletionTime()
+ {
+ return deletionTime;
+ }
+
+ public RangeTombstone makeRangeTombstone(ClusteringComparator comparator, Clustering clustering)
+ {
+ return makeRangeTombstone(Slice.make(comparator, clustering));
+ }
+
+ public RangeTombstone makeRangeTombstone(Slice slice)
+ {
+ return new RangeTombstone(slice, deletionTime);
}
+ /**
- * Returns the prefetched list with the already performed modifications.
- * <p>If no modification have yet been performed this method will return the fetched list.
- * If some modifications (updates or deletions) have already been done the list returned
- * will be the result of the merge of the fetched list and of the pending mutations.</p>
++ * Returns the prefetched row with the already performed modifications.
++ * <p>If no modification have yet been performed this method will return the fetched row or {@code null} if
++ * the row does not exist. If some modifications (updates or deletions) have already been done the row returned
++ * will be the result of the merge of the fetched row and of the pending mutations.</p>
+ *
- * @param rowKey the row key
- * @param cql3ColumnName the column name
- * @param cf the pending modifications
- * @return the prefetched list with the already performed modifications
++ * @param key the partition key
++ * @param clustering the row clustering
++ * @return the prefetched row with the already performed modifications
+ */
- public List<Cell> getPrefetchedList(ByteBuffer rowKey, ColumnIdentifier cql3ColumnName, ColumnFamily cf)
+ public Row getPrefetchedRow(DecoratedKey key, Clustering clustering)
{
- if (prefetchedLists == null)
- return Collections.emptyList();
+ if (prefetchedRows == null)
+ return null;
- CQL3Row row = prefetchedLists.get(rowKey);
+ Partition partition = prefetchedRows.get(key);
- return partition == null ? null : partition.searchIterator(ColumnFilter.selection(partition.columns()), false).next(clustering);
++ Row prefetchedRow = partition == null ? null : partition.searchIterator(ColumnFilter.selection(partition.columns()), false).next(clustering);
+
- List<Cell> cql3List = row == null ? Collections.<Cell>emptyList() : row.getMultiCellColumn(cql3ColumnName);
++ // We need to apply the pending mutations to return the row in its current state
++ Row pendingMutations = builder.copy().build();
+
- if (!cf.isEmpty())
- {
- ColumnFamily currentCf = cf.cloneMe();
-
- for (Cell c : cql3List)
- currentCf.addColumn(c);
++ if (pendingMutations.isEmpty())
++ return prefetchedRow;
+
- CFMetaData cfm = currentCf.metadata();
- CQL3Row.RowIterator iterator = cfm.comparator.CQL3RowBuilder(cfm, timestamp).group(currentCf.iterator());
- // We can only update one CQ3Row per partition key at a time (we don't allow IN for clustering key)
- cql3List = iterator.hasNext() ? iterator.next().getMultiCellColumn(cql3ColumnName) : null;
- }
++ if (prefetchedRow == null)
++ return pendingMutations;
+
- return (cql3List == null) ? Collections.<Cell>emptyList() : cql3List;
++ return Rows.merge(prefetchedRow, pendingMutations, nowInSec)
++ .purge(DeletionPurger.PURGE_ALL, nowInSec);
}
}
http://git-wip-us.apache.org/repos/asf/cassandra/blob/aeca1d2b/src/java/org/apache/cassandra/db/rows/BTreeRow.java
----------------------------------------------------------------------
diff --cc src/java/org/apache/cassandra/db/rows/BTreeRow.java
index ea1d9e0,0000000..fda33d6
mode 100644,000000..100644
--- a/src/java/org/apache/cassandra/db/rows/BTreeRow.java
+++ b/src/java/org/apache/cassandra/db/rows/BTreeRow.java
@@@ -1,697 -1,0 +1,724 @@@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cassandra.db.rows;
+
+import java.nio.ByteBuffer;
+import java.util.*;
+import java.util.function.Predicate;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Collections2;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Iterators;
+
+import org.apache.cassandra.config.CFMetaData;
+import org.apache.cassandra.config.ColumnDefinition;
+import org.apache.cassandra.db.*;
+import org.apache.cassandra.db.filter.ColumnFilter;
+import org.apache.cassandra.db.marshal.AbstractType;
+import org.apache.cassandra.db.partitions.PartitionUpdate;
+import org.apache.cassandra.utils.AbstractIterator;
+import org.apache.cassandra.utils.ByteBufferUtil;
+import org.apache.cassandra.utils.ObjectSizes;
+import org.apache.cassandra.utils.btree.BTree;
+import org.apache.cassandra.utils.btree.BTreeSearchIterator;
+import org.apache.cassandra.utils.btree.UpdateFunction;
+
+/**
+ * Immutable implementation of a Row object.
+ */
+public class BTreeRow extends AbstractRow
+{
+ private static final long EMPTY_SIZE = ObjectSizes.measure(emptyRow(Clustering.EMPTY));
+
+ private final Clustering clustering;
+ private final LivenessInfo primaryKeyLivenessInfo;
+ private final Deletion deletion;
+
+ // The data for each columns present in this row in column sorted order.
+ private final Object[] btree;
+
+ // We need to filter the tombstones of a row on every read (twice in fact: first to remove purgeable tombstone, and then after reconciliation to remove
+ // all tombstone since we don't return them to the client) as well as on compaction. But it's likely that many rows won't have any tombstone at all, so
+ // we want to speed up that case by not having to iterate/copy the row in this case. We could keep a single boolean telling us if we have tombstones,
+ // but that doesn't work for expiring columns. So instead we keep the deletion time for the first thing in the row to be deleted. This allow at any given
+ // time to know if we have any deleted information or not. If we any "true" tombstone (i.e. not an expiring cell), this value will be forced to
+ // Integer.MIN_VALUE, but if we don't and have expiring cells, this will the time at which the first expiring cell expires. If we have no tombstones and
+ // no expiring cells, this will be Integer.MAX_VALUE;
+ private final int minLocalDeletionTime;
+
+ private BTreeRow(Clustering clustering, LivenessInfo primaryKeyLivenessInfo, Deletion deletion, Object[] btree, int minLocalDeletionTime)
+ {
+ assert !deletion.isShadowedBy(primaryKeyLivenessInfo);
+ this.clustering = clustering;
+ this.primaryKeyLivenessInfo = primaryKeyLivenessInfo;
+ this.deletion = deletion;
+ this.btree = btree;
+ this.minLocalDeletionTime = minLocalDeletionTime;
+ }
+
+ private BTreeRow(Clustering clustering, Object[] btree, int minLocalDeletionTime)
+ {
+ this(clustering, LivenessInfo.EMPTY, Deletion.LIVE, btree, minLocalDeletionTime);
+ }
+
+ // Note that it's often easier/safer to use the sortedBuilder/unsortedBuilder or one of the static creation method below. Only directly useful in a small amount of cases.
+ public static BTreeRow create(Clustering clustering, LivenessInfo primaryKeyLivenessInfo, Deletion deletion, Object[] btree)
+ {
+ int minDeletionTime = Math.min(minDeletionTime(primaryKeyLivenessInfo), minDeletionTime(deletion.time()));
+ if (minDeletionTime != Integer.MIN_VALUE)
+ {
+ for (ColumnData cd : BTree.<ColumnData>iterable(btree))
+ minDeletionTime = Math.min(minDeletionTime, minDeletionTime(cd));
+ }
+
+ return new BTreeRow(clustering, primaryKeyLivenessInfo, deletion, btree, minDeletionTime);
+ }
+
+ public static BTreeRow emptyRow(Clustering clustering)
+ {
+ return new BTreeRow(clustering, BTree.empty(), Integer.MAX_VALUE);
+ }
+
+ public static BTreeRow singleCellRow(Clustering clustering, Cell cell)
+ {
+ if (cell.column().isSimple())
+ return new BTreeRow(clustering, BTree.singleton(cell), minDeletionTime(cell));
+
+ ComplexColumnData complexData = new ComplexColumnData(cell.column(), new Cell[]{ cell }, DeletionTime.LIVE);
+ return new BTreeRow(clustering, BTree.singleton(complexData), minDeletionTime(cell));
+ }
+
+ public static BTreeRow emptyDeletedRow(Clustering clustering, Deletion deletion)
+ {
+ assert !deletion.isLive();
+ return new BTreeRow(clustering, LivenessInfo.EMPTY, deletion, BTree.empty(), Integer.MIN_VALUE);
+ }
+
+ public static BTreeRow noCellLiveRow(Clustering clustering, LivenessInfo primaryKeyLivenessInfo)
+ {
+ assert !primaryKeyLivenessInfo.isEmpty();
+ return new BTreeRow(clustering, primaryKeyLivenessInfo, Deletion.LIVE, BTree.empty(), minDeletionTime(primaryKeyLivenessInfo));
+ }
+
+ private static int minDeletionTime(Cell cell)
+ {
+ return cell.isTombstone() ? Integer.MIN_VALUE : cell.localDeletionTime();
+ }
+
+ private static int minDeletionTime(LivenessInfo info)
+ {
+ return info.isExpiring() ? info.localExpirationTime() : Integer.MAX_VALUE;
+ }
+
+ private static int minDeletionTime(DeletionTime dt)
+ {
+ return dt.isLive() ? Integer.MAX_VALUE : Integer.MIN_VALUE;
+ }
+
+ private static int minDeletionTime(ComplexColumnData cd)
+ {
+ int min = minDeletionTime(cd.complexDeletion());
+ for (Cell cell : cd)
+ {
+ min = Math.min(min, minDeletionTime(cell));
+ if (min == Integer.MIN_VALUE)
+ break;
+ }
+ return min;
+ }
+
+ private static int minDeletionTime(ColumnData cd)
+ {
+ return cd.column().isSimple() ? minDeletionTime((Cell) cd) : minDeletionTime((ComplexColumnData)cd);
+ }
+
+ private static int minDeletionTime(Object[] btree, LivenessInfo info, DeletionTime rowDeletion)
+ {
+ int min = Math.min(minDeletionTime(info), minDeletionTime(rowDeletion));
+ for (ColumnData cd : BTree.<ColumnData>iterable(btree))
+ {
+ min = Math.min(min, minDeletionTime(cd));
+ if (min == Integer.MIN_VALUE)
+ break;
+ }
+ return min;
+ }
+
+ public Clustering clustering()
+ {
+ return clustering;
+ }
+
+ public Collection<ColumnDefinition> columns()
+ {
+ return Collections2.transform(this, ColumnData::column);
+ }
+
+ public LivenessInfo primaryKeyLivenessInfo()
+ {
+ return primaryKeyLivenessInfo;
+ }
+
+ public boolean isEmpty()
+ {
+ return primaryKeyLivenessInfo().isEmpty()
+ && deletion().isLive()
+ && BTree.isEmpty(btree);
+ }
+
+ public Deletion deletion()
+ {
+ return deletion;
+ }
+
+ public Cell getCell(ColumnDefinition c)
+ {
+ assert !c.isComplex();
+ return (Cell) BTree.<Object>find(btree, ColumnDefinition.asymmetricColumnDataComparator, c);
+ }
+
+ public Cell getCell(ColumnDefinition c, CellPath path)
+ {
+ assert c.isComplex();
+ ComplexColumnData cd = getComplexColumnData(c);
+ if (cd == null)
+ return null;
+ return cd.getCell(path);
+ }
+
+ public ComplexColumnData getComplexColumnData(ColumnDefinition c)
+ {
+ assert c.isComplex();
+ return (ComplexColumnData) BTree.<Object>find(btree, ColumnDefinition.asymmetricColumnDataComparator, c);
+ }
+
+ public int size()
+ {
+ return BTree.size(btree);
+ }
+
+ public Iterator<ColumnData> iterator()
+ {
+ return searchIterator();
+ }
+
+ public Iterable<Cell> cells()
+ {
+ return CellIterator::new;
+ }
+
+ public BTreeSearchIterator<ColumnDefinition, ColumnData> searchIterator()
+ {
+ return BTree.slice(btree, ColumnDefinition.asymmetricColumnDataComparator, BTree.Dir.ASC);
+ }
+
+ public Row filter(ColumnFilter filter, CFMetaData metadata)
+ {
+ return filter(filter, DeletionTime.LIVE, false, metadata);
+ }
+
+ public Row filter(ColumnFilter filter, DeletionTime activeDeletion, boolean setActiveDeletionToRow, CFMetaData metadata)
+ {
+ Map<ByteBuffer, CFMetaData.DroppedColumn> droppedColumns = metadata.getDroppedColumns();
+
+ if (filter.includesAllColumns() && (activeDeletion.isLive() || deletion.supersedes(activeDeletion)) && droppedColumns.isEmpty())
+ return this;
+
+ boolean mayHaveShadowed = activeDeletion.supersedes(deletion.time());
+
+ LivenessInfo newInfo = primaryKeyLivenessInfo;
+ Deletion newDeletion = deletion;
+ if (mayHaveShadowed)
+ {
+ if (activeDeletion.deletes(newInfo.timestamp()))
+ newInfo = LivenessInfo.EMPTY;
+ // note that mayHaveShadowed means the activeDeletion shadows the row deletion. So if don't have setActiveDeletionToRow,
+ // the row deletion is shadowed and we shouldn't return it.
+ newDeletion = setActiveDeletionToRow ? Deletion.regular(activeDeletion) : Deletion.LIVE;
+ }
+
+ Columns columns = filter.fetchedColumns().columns(isStatic());
+ Predicate<ColumnDefinition> inclusionTester = columns.inOrderInclusionTester();
+ return transformAndFilter(newInfo, newDeletion, (cd) -> {
+
+ ColumnDefinition column = cd.column();
+ if (!inclusionTester.test(column))
+ return null;
+
+ CFMetaData.DroppedColumn dropped = droppedColumns.get(column.name.bytes);
+ if (column.isComplex())
+ return ((ComplexColumnData) cd).filter(filter, mayHaveShadowed ? activeDeletion : DeletionTime.LIVE, dropped);
+
+ Cell cell = (Cell) cd;
+ return (dropped == null || cell.timestamp() > dropped.droppedTime) && !(mayHaveShadowed && activeDeletion.deletes(cell))
+ ? cell : null;
+ });
+ }
+
+ public boolean hasComplex()
+ {
+ // We start by the end cause we know complex columns sort after the simple ones
+ ColumnData cd = Iterables.getFirst(BTree.<ColumnData>iterable(btree, BTree.Dir.DESC), null);
+ return cd != null && cd.column.isComplex();
+ }
+
+ public boolean hasComplexDeletion()
+ {
+ // We start by the end cause we know complex columns sort before simple ones
+ for (ColumnData cd : BTree.<ColumnData>iterable(btree, BTree.Dir.DESC))
+ {
+ if (cd.column().isSimple())
+ return false;
+
+ if (!((ComplexColumnData)cd).complexDeletion().isLive())
+ return true;
+ }
+ return false;
+ }
+
+ public Row markCounterLocalToBeCleared()
+ {
+ return transformAndFilter(primaryKeyLivenessInfo, deletion, (cd) -> cd.column().cellValueType().isCounter()
+ ? cd.markCounterLocalToBeCleared()
+ : cd);
+ }
+
+ public boolean hasDeletion(int nowInSec)
+ {
+ return nowInSec >= minLocalDeletionTime;
+ }
+
+ /**
+ * Returns a copy of the row where all timestamps for live data have replaced by {@code newTimestamp} and
+ * all deletion timestamp by {@code newTimestamp - 1}.
+ *
+ * This exists for the Paxos path, see {@link PartitionUpdate#updateAllTimestamp} for additional details.
+ */
+ public Row updateAllTimestamp(long newTimestamp)
+ {
+ LivenessInfo newInfo = primaryKeyLivenessInfo.isEmpty() ? primaryKeyLivenessInfo : primaryKeyLivenessInfo.withUpdatedTimestamp(newTimestamp);
+ // If the deletion is shadowable and the row has a timestamp, we'll forced the deletion timestamp to be less than the row one, so we
+ // should get rid of said deletion.
+ Deletion newDeletion = deletion.isLive() || (deletion.isShadowable() && !primaryKeyLivenessInfo.isEmpty())
+ ? Deletion.LIVE
+ : new Deletion(new DeletionTime(newTimestamp - 1, deletion.time().localDeletionTime()), deletion.isShadowable());
+
+ return transformAndFilter(newInfo, newDeletion, (cd) -> cd.updateAllTimestamp(newTimestamp));
+ }
+
+ public Row withRowDeletion(DeletionTime newDeletion)
+ {
+ // Note that:
+ // - it is a contract with the caller that the new deletion shouldn't shadow anything in
+ // the row, and so in particular it can't shadow the row deletion. So if there is a
+ // already a row deletion we have nothing to do.
+ // - we set the minLocalDeletionTime to MIN_VALUE because we know the deletion is live
+ return newDeletion.isLive() || !deletion.isLive()
+ ? this
+ : new BTreeRow(clustering, primaryKeyLivenessInfo, Deletion.regular(newDeletion), btree, Integer.MIN_VALUE);
+ }
+
+ public Row purge(DeletionPurger purger, int nowInSec)
+ {
+ if (!hasDeletion(nowInSec))
+ return this;
+
+ LivenessInfo newInfo = purger.shouldPurge(primaryKeyLivenessInfo, nowInSec) ? LivenessInfo.EMPTY : primaryKeyLivenessInfo;
+ Deletion newDeletion = purger.shouldPurge(deletion.time()) ? Deletion.LIVE : deletion;
+
+ return transformAndFilter(newInfo, newDeletion, (cd) -> cd.purge(purger, nowInSec));
+ }
+
+ private Row transformAndFilter(LivenessInfo info, Deletion deletion, Function<ColumnData, ColumnData> function)
+ {
+ Object[] transformed = BTree.transformAndFilter(btree, function);
+
+ if (btree == transformed && info == this.primaryKeyLivenessInfo && deletion == this.deletion)
+ return this;
+
+ if (info.isEmpty() && deletion.isLive() && BTree.isEmpty(transformed))
+ return null;
+
+ int minDeletionTime = minDeletionTime(transformed, info, deletion.time());
+ return new BTreeRow(clustering, info, deletion, transformed, minDeletionTime);
+ }
+
+ public int dataSize()
+ {
+ int dataSize = clustering.dataSize()
+ + primaryKeyLivenessInfo.dataSize()
+ + deletion.dataSize();
+
+ for (ColumnData cd : this)
+ dataSize += cd.dataSize();
+ return dataSize;
+ }
+
+ public long unsharedHeapSizeExcludingData()
+ {
+ long heapSize = EMPTY_SIZE
+ + clustering.unsharedHeapSizeExcludingData()
+ + BTree.sizeOfStructureOnHeap(btree);
+
+ for (ColumnData cd : this)
+ heapSize += cd.unsharedHeapSizeExcludingData();
+ return heapSize;
+ }
+
+ public static Row.Builder sortedBuilder()
+ {
+ return new Builder(true);
+ }
+
+ public static Row.Builder unsortedBuilder(int nowInSec)
+ {
+ return new Builder(false, nowInSec);
+ }
+
+ // This is only used by PartitionUpdate.CounterMark but other uses should be avoided as much as possible as it breaks our general
+ // assumption that Row objects are immutable. This method should go away post-#6506 in particular.
+ // This method is in particular not exposed by the Row API on purpose.
+ // This method also *assumes* that the cell we're setting already exists.
+ public void setValue(ColumnDefinition column, CellPath path, ByteBuffer value)
+ {
+ ColumnData current = (ColumnData) BTree.<Object>find(btree, ColumnDefinition.asymmetricColumnDataComparator, column);
+ if (column.isSimple())
+ BTree.replaceInSitu(btree, ColumnData.comparator, current, ((Cell) current).withUpdatedValue(value));
+ else
+ ((ComplexColumnData) current).setValue(path, value);
+ }
+
+ public Iterable<Cell> cellsInLegacyOrder(CFMetaData metadata, boolean reversed)
+ {
+ return () -> new CellInLegacyOrderIterator(metadata, reversed);
+ }
+
+ private class CellIterator extends AbstractIterator<Cell>
+ {
+ private Iterator<ColumnData> columnData = iterator();
+ private Iterator<Cell> complexCells;
+
+ protected Cell computeNext()
+ {
+ while (true)
+ {
+ if (complexCells != null)
+ {
+ if (complexCells.hasNext())
+ return complexCells.next();
+
+ complexCells = null;
+ }
+
+ if (!columnData.hasNext())
+ return endOfData();
+
+ ColumnData cd = columnData.next();
+ if (cd.column().isComplex())
+ complexCells = ((ComplexColumnData)cd).iterator();
+ else
+ return (Cell)cd;
+ }
+ }
+ }
+
+ private class CellInLegacyOrderIterator extends AbstractIterator<Cell>
+ {
+ private final Comparator<ByteBuffer> comparator;
+ private final boolean reversed;
+ private final int firstComplexIdx;
+ private int simpleIdx;
+ private int complexIdx;
+ private Iterator<Cell> complexCells;
+ private final Object[] data;
+
+ private CellInLegacyOrderIterator(CFMetaData metadata, boolean reversed)
+ {
+ AbstractType<?> nameComparator = metadata.getColumnDefinitionNameComparator(isStatic() ? ColumnDefinition.Kind.STATIC : ColumnDefinition.Kind.REGULAR);
+ this.comparator = reversed ? Collections.reverseOrder(nameComparator) : nameComparator;
+ this.reversed = reversed;
+
+ // copy btree into array for simple separate iteration of simple and complex columns
+ this.data = new Object[BTree.size(btree)];
+ BTree.toArray(btree, data, 0);
+
+ int idx = Iterators.indexOf(Iterators.forArray(data), cd -> cd instanceof ComplexColumnData);
+ this.firstComplexIdx = idx < 0 ? data.length : idx;
+ this.complexIdx = firstComplexIdx;
+ }
+
+ private int getSimpleIdx()
+ {
+ return reversed ? firstComplexIdx - simpleIdx - 1 : simpleIdx;
+ }
+
+ private int getSimpleIdxAndIncrement()
+ {
+ int idx = getSimpleIdx();
+ ++simpleIdx;
+ return idx;
+ }
+
+ private int getComplexIdx()
+ {
+ return reversed ? data.length + firstComplexIdx - complexIdx - 1 : complexIdx;
+ }
+
+ private int getComplexIdxAndIncrement()
+ {
+ int idx = getComplexIdx();
+ ++complexIdx;
+ return idx;
+ }
+
+ private Iterator<Cell> makeComplexIterator(Object complexData)
+ {
+ ComplexColumnData ccd = (ComplexColumnData)complexData;
+ return reversed ? ccd.reverseIterator() : ccd.iterator();
+ }
+
+ protected Cell computeNext()
+ {
+ while (true)
+ {
+ if (complexCells != null)
+ {
+ if (complexCells.hasNext())
+ return complexCells.next();
+
+ complexCells = null;
+ }
+
+ if (simpleIdx >= firstComplexIdx)
+ {
+ if (complexIdx >= data.length)
+ return endOfData();
+
+ complexCells = makeComplexIterator(data[getComplexIdxAndIncrement()]);
+ }
+ else
+ {
+ if (complexIdx >= data.length)
+ return (Cell)data[getSimpleIdxAndIncrement()];
+
+ if (comparator.compare(((ColumnData) data[getSimpleIdx()]).column().name.bytes, ((ColumnData) data[getComplexIdx()]).column().name.bytes) < 0)
+ return (Cell)data[getSimpleIdxAndIncrement()];
+ else
+ complexCells = makeComplexIterator(data[getComplexIdxAndIncrement()]);
+ }
+ }
+ }
+ }
+
+ public static class Builder implements Row.Builder
+ {
+ // a simple marker class that will sort to the beginning of a run of complex cells to store the deletion time
+ private static class ComplexColumnDeletion extends BufferCell
+ {
+ public ComplexColumnDeletion(ColumnDefinition column, DeletionTime deletionTime)
+ {
+ super(column, deletionTime.markedForDeleteAt(), 0, deletionTime.localDeletionTime(), ByteBufferUtil.EMPTY_BYTE_BUFFER, CellPath.BOTTOM);
+ }
+ }
+
+ // converts a run of Cell with equal column into a ColumnData
+ private static class CellResolver implements BTree.Builder.Resolver
+ {
+ final int nowInSec;
+ private CellResolver(int nowInSec)
+ {
+ this.nowInSec = nowInSec;
+ }
+
+ public ColumnData resolve(Object[] cells, int lb, int ub)
+ {
+ Cell cell = (Cell) cells[lb];
+ ColumnDefinition column = cell.column;
+ if (cell.column.isSimple())
+ {
+ assert lb + 1 == ub || nowInSec != Integer.MIN_VALUE;
+ while (++lb < ub)
+ cell = Cells.reconcile(cell, (Cell) cells[lb], nowInSec);
+ return cell;
+ }
+
+ // TODO: relax this in the case our outer provider is sorted (want to delay until remaining changes are
+ // bedded in, as less important; galloping makes it pretty cheap anyway)
+ Arrays.sort(cells, lb, ub, (Comparator<Object>) column.cellComparator());
+ DeletionTime deletion = DeletionTime.LIVE;
+ // Deal with complex deletion (for which we've use "fake" ComplexColumnDeletion cells that we need to remove).
+ // Note that in almost all cases we'll at most one of those fake cell, but the contract of {{Row.Builder.addComplexDeletion}}
+ // does not forbid it being called twice (especially in the unsorted case) and this can actually happen when reading
+ // legacy sstables (see #10743).
+ while (lb < ub)
+ {
+ cell = (Cell) cells[lb];
+ if (!(cell instanceof ComplexColumnDeletion))
+ break;
+
+ if (cell.timestamp() > deletion.markedForDeleteAt())
+ deletion = new DeletionTime(cell.timestamp(), cell.localDeletionTime());
+ lb++;
+ }
+
- List<Object> buildFrom = Arrays.asList(cells).subList(lb, ub);
- if (deletion != DeletionTime.LIVE)
++ List<Object> buildFrom = new ArrayList<>(ub - lb);
++ Cell previous = null;
++ for (int i = lb; i < ub; i++)
+ {
- // Make sure we don't include any shadowed cells
- List<Object> filtered = new ArrayList<>(buildFrom.size());
- for (Object c : buildFrom)
++ Cell c = (Cell) cells[i];
++
++ if (deletion == DeletionTime.LIVE || c.timestamp() >= deletion.markedForDeleteAt())
+ {
- if (((Cell)c).timestamp() >= deletion.markedForDeleteAt())
- filtered.add(c);
++ if (previous != null && column.cellComparator().compare(previous, c) == 0)
++ {
++ c = Cells.reconcile(previous, c, nowInSec);
++ buildFrom.set(buildFrom.size() - 1, c);
++ }
++ else
++ {
++ buildFrom.add(c);
++ }
++ previous = c;
+ }
- buildFrom = filtered;
+ }
++
+ Object[] btree = BTree.build(buildFrom, UpdateFunction.noOp());
+ return new ComplexColumnData(column, btree, deletion);
+ }
+
+ };
+ protected Clustering clustering;
+ protected LivenessInfo primaryKeyLivenessInfo = LivenessInfo.EMPTY;
+ protected Deletion deletion = Deletion.LIVE;
+
+ private final boolean isSorted;
+ private final BTree.Builder<Cell> cells;
+ private final CellResolver resolver;
+ private boolean hasComplex = false;
+
+ // For complex column at index i of 'columns', we store at complexDeletions[i] its complex deletion.
+
+ protected Builder(boolean isSorted)
+ {
+ this(isSorted, Integer.MIN_VALUE);
+ }
+
+ protected Builder(boolean isSorted, int nowInSecs)
+ {
+ this.cells = BTree.builder(ColumnData.comparator);
+ resolver = new CellResolver(nowInSecs);
+ this.isSorted = isSorted;
+ this.cells.auto(false);
+ }
+
++ protected Builder(Builder builder)
++ {
++ clustering = builder.clustering;
++ primaryKeyLivenessInfo = builder.primaryKeyLivenessInfo;
++ deletion = builder.deletion;
++ cells = builder.cells.copy();
++ resolver = builder.resolver;
++ isSorted = builder.isSorted;
++ hasComplex = builder.hasComplex;
++ }
++
++ @Override
++ public Builder copy()
++ {
++ return new Builder(this);
++ }
++
+ public boolean isSorted()
+ {
+ return isSorted;
+ }
+
+ public void newRow(Clustering clustering)
+ {
+ assert this.clustering == null; // Ensures we've properly called build() if we've use this builder before
+ this.clustering = clustering;
+ }
+
+ public Clustering clustering()
+ {
+ return clustering;
+ }
+
+ protected void reset()
+ {
+ this.clustering = null;
+ this.primaryKeyLivenessInfo = LivenessInfo.EMPTY;
+ this.deletion = Deletion.LIVE;
+ this.cells.reuse();
++ this.hasComplex = false;
+ }
+
+ public void addPrimaryKeyLivenessInfo(LivenessInfo info)
+ {
+ // The check is only required for unsorted builders, but it's worth the extra safety to have it unconditional
+ if (!deletion.deletes(info))
+ this.primaryKeyLivenessInfo = info;
+ }
+
+ public void addRowDeletion(Deletion deletion)
+ {
+ this.deletion = deletion;
+ // The check is only required for unsorted builders, but it's worth the extra safety to have it unconditional
+ if (deletion.deletes(primaryKeyLivenessInfo))
+ this.primaryKeyLivenessInfo = LivenessInfo.EMPTY;
+ }
+
+ public void addCell(Cell cell)
+ {
+ assert cell.column().isStatic() == (clustering == Clustering.STATIC_CLUSTERING) : "Column is " + cell.column() + ", clustering = " + clustering;
+ // In practice, only unsorted builder have to deal with shadowed cells, but it doesn't cost us much to deal with it unconditionally in this case
+ if (deletion.deletes(cell))
+ return;
+
+ cells.add(cell);
+ hasComplex |= cell.column.isComplex();
+ }
+
+ public void addComplexDeletion(ColumnDefinition column, DeletionTime complexDeletion)
+ {
+ cells.add(new ComplexColumnDeletion(column, complexDeletion));
+ hasComplex = true;
+ }
+
+ public Row build()
+ {
+ if (!isSorted)
+ cells.sort();
+ // we can avoid resolving if we're sorted and have no complex values
+ // (because we'll only have unique simple cells, which are already in their final condition)
+ if (!isSorted | hasComplex)
+ cells.resolve(resolver);
+ Object[] btree = cells.build();
+
+ if (deletion.isShadowedBy(primaryKeyLivenessInfo))
+ deletion = Deletion.LIVE;
+
+ int minDeletionTime = minDeletionTime(btree, primaryKeyLivenessInfo, deletion.time());
+ Row row = new BTreeRow(clustering, primaryKeyLivenessInfo, deletion, btree, minDeletionTime);
+ reset();
+ return row;
+ }
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/cassandra/blob/aeca1d2b/src/java/org/apache/cassandra/db/rows/Row.java
----------------------------------------------------------------------
diff --cc src/java/org/apache/cassandra/db/rows/Row.java
index c7c3216,0000000..74d8664
mode 100644,000000..100644
--- a/src/java/org/apache/cassandra/db/rows/Row.java
+++ b/src/java/org/apache/cassandra/db/rows/Row.java
@@@ -1,690 -1,0 +1,696 @@@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cassandra.db.rows;
+
+import java.util.*;
+import java.security.MessageDigest;
+
+import org.apache.cassandra.config.CFMetaData;
+import org.apache.cassandra.config.ColumnDefinition;
+import org.apache.cassandra.db.*;
+import org.apache.cassandra.db.filter.ColumnFilter;
+import org.apache.cassandra.service.paxos.Commit;
+import org.apache.cassandra.utils.FBUtilities;
+import org.apache.cassandra.utils.MergeIterator;
+import org.apache.cassandra.utils.SearchIterator;
+import org.apache.cassandra.utils.btree.BTree;
+import org.apache.cassandra.utils.btree.UpdateFunction;
+
+/**
+ * Storage engine representation of a row.
+ *
+ * A row mainly contains the following informations:
+ * 1) Its {@code Clustering}, which holds the values for the clustering columns identifying the row.
+ * 2) Its row level informations: the primary key liveness infos and the row deletion (see
+ * {@link #primaryKeyLivenessInfo()} and {@link #deletion()} for more details).
+ * 3) Data for the columns it contains, or in other words, it's a (sorted) collection of
+ * {@code ColumnData}.
+ *
+ * Also note that as for every other storage engine object, a {@code Row} object cannot shadow
+ * it's own data. For instance, a {@code Row} cannot contains a cell that is deleted by its own
+ * row deletion.
+ */
+public interface Row extends Unfiltered, Collection<ColumnData>
+{
+ /**
+ * The clustering values for this row.
+ */
+ @Override
+ public Clustering clustering();
+
+ /**
+ * An in-natural-order collection of the columns for which data (incl. simple tombstones)
+ * is present in this row.
+ */
+ public Collection<ColumnDefinition> columns();
+
+ /**
+ * The row deletion.
+ *
+ * This correspond to the last row deletion done on this row.
+ *
+ * @return the row deletion.
+ */
+ public Deletion deletion();
+
+ /**
+ * Liveness information for the primary key columns of this row.
+ * <p>
+ * As a row is uniquely identified by its primary key, all its primary key columns
+ * share the same {@code LivenessInfo}. This liveness information is what allows us
+ * to distinguish between a dead row (it has no live cells and its primary key liveness
+ * info is empty) and a live row but where all non PK columns are null (it has no
+ * live cells, but its primary key liveness is not empty). Please note that the liveness
+ * info (including it's eventually ttl/local deletion time) only apply to the primary key
+ * columns and has no impact on the row content.
+ * <p>
+ * Note in particular that a row may have live cells but no PK liveness info, because the
+ * primary key liveness informations are only set on {@code INSERT} (which makes sense
+ * in itself, see #6782) but live cells can be added through {@code UPDATE} even if the row
+ * wasn't pre-existing (which users are encouraged not to do, but we can't validate).
+ */
+ public LivenessInfo primaryKeyLivenessInfo();
+
+ /**
+ * Whether the row correspond to a static row or not.
+ *
+ * @return whether the row correspond to a static row or not.
+ */
+ public boolean isStatic();
+
+ /**
+ * Whether the row has no information whatsoever. This means no PK liveness info, no row
+ * deletion, no cells and no complex deletion info.
+ *
+ * @return {@code true} if the row has no data, {@code false} otherwise.
+ */
+ public boolean isEmpty();
+
+ /**
+ * Whether the row has some live information (i.e. it's not just deletion informations).
+ */
+ public boolean hasLiveData(int nowInSec);
+
+ /**
+ * Returns a cell for a simple column.
+ *
+ * @param c the simple column for which to fetch the cell.
+ * @return the corresponding cell or {@code null} if the row has no such cell.
+ */
+ public Cell getCell(ColumnDefinition c);
+
+ /**
+ * Return a cell for a given complex column and cell path.
+ *
+ * @param c the complex column for which to fetch the cell.
+ * @param path the cell path for which to fetch the cell.
+ * @return the corresponding cell or {@code null} if the row has no such cell.
+ */
+ public Cell getCell(ColumnDefinition c, CellPath path);
+
+ /**
+ * The data for a complex column.
+ * <p>
+ * The returned object groups all the cells for the column, as well as it's complex deletion (if relevant).
+ *
+ * @param c the complex column for which to return the complex data.
+ * @return the data for {@code c} or {@code null} is the row has no data for this column.
+ */
+ public ComplexColumnData getComplexColumnData(ColumnDefinition c);
+
+ /**
+ * An iterable over the cells of this row.
+ * <p>
+ * The iterable guarantees that cells are returned in order of {@link Cell#comparator}.
+ *
+ * @return an iterable over the cells of this row.
+ */
+ public Iterable<Cell> cells();
+
+ /**
+ * An iterable over the cells of this row that return cells in "legacy order".
+ * <p>
+ * In 3.0+, columns are sorted so that all simple columns are before all complex columns. Previously
+ * however, the cells where just sorted by the column name. This iterator return cells in that
+ * legacy order. It's only ever meaningful for backward/thrift compatibility code.
+ *
+ * @param metadata the table this is a row of.
+ * @param reversed if cells should returned in reverse order.
+ * @return an iterable over the cells of this row in "legacy order".
+ */
+ public Iterable<Cell> cellsInLegacyOrder(CFMetaData metadata, boolean reversed);
+
+ /**
+ * Whether the row stores any (non-live) complex deletion for any complex column.
+ */
+ public boolean hasComplexDeletion();
+
+ /**
+ * Whether the row stores any (non-RT) data for any complex column.
+ */
+ boolean hasComplex();
+
+ /**
+ * Whether the row has any deletion info (row deletion, cell tombstone, expired cell or complex deletion).
+ *
+ * @param nowInSec the current time in seconds to decid if a cell is expired.
+ */
+ public boolean hasDeletion(int nowInSec);
+
+ /**
+ * An iterator to efficiently search data for a given column.
+ *
+ * @return a search iterator for the cells of this row.
+ */
+ public SearchIterator<ColumnDefinition, ColumnData> searchIterator();
+
+ /**
+ * Returns a copy of this row that:
+ * 1) only includes the data for the column included by {@code filter}.
+ * 2) doesn't include any data that belongs to a dropped column (recorded in {@code metadata}).
+ */
+ public Row filter(ColumnFilter filter, CFMetaData metadata);
+
+ /**
+ * Returns a copy of this row that:
+ * 1) only includes the data for the column included by {@code filter}.
+ * 2) doesn't include any data that belongs to a dropped column (recorded in {@code metadata}).
+ * 3) doesn't include any data that is shadowed/deleted by {@code activeDeletion}.
+ * 4) uses {@code activeDeletion} as row deletion iff {@code setActiveDeletionToRow} and {@code activeDeletion} supersedes the row deletion.
+ */
+ public Row filter(ColumnFilter filter, DeletionTime activeDeletion, boolean setActiveDeletionToRow, CFMetaData metadata);
+
+ /**
+ * Returns a copy of this row without any deletion info that should be purged according to {@code purger}.
+ *
+ * @param purger the {@code DeletionPurger} to use to decide what can be purged.
+ * @param nowInSec the current time to decide what is deleted and what isn't (in the case of expired cells).
+ * @return this row but without any deletion info purged by {@code purger}. If the purged row is empty, returns
+ * {@code null}.
+ */
+ public Row purge(DeletionPurger purger, int nowInSec);
+
+ /**
+ * Returns a copy of this row where all counter cells have they "local" shard marked for clearing.
+ */
+ public Row markCounterLocalToBeCleared();
+
+ /**
+ * Returns a copy of this row where all live timestamp have been replaced by {@code newTimestamp} and every deletion
+ * timestamp by {@code newTimestamp - 1}.
+ *
+ * @param newTimestamp the timestamp to use for all live data in the returned row.
+ * @param a copy of this row with timestamp updated using {@code newTimestamp}. This can return {@code null} in the
+ * rare where the row only as a shadowable row deletion and the new timestamp supersedes it.
+ *
+ * @see Commit for why we need this.
+ */
+ public Row updateAllTimestamp(long newTimestamp);
+
+ /**
+ * Returns a copy of this row with the new deletion as row deletion if it is more recent
+ * than the current row deletion.
+ * <p>
+ * WARNING: this method <b>does not</b> check that nothing in the row is shadowed by the provided
+ * deletion and if that is the case, the created row will be <b>invalid</b>. It is thus up to the
+ * caller to verify that this is not the case and the only reasonable use case of this is probably
+ * when the row and the deletion comes from the same {@code UnfilteredRowIterator} since that gives
+ * use this guarantee.
+ */
+ public Row withRowDeletion(DeletionTime deletion);
+
+ public int dataSize();
+
+ public long unsharedHeapSizeExcludingData();
+
+ public String toString(CFMetaData metadata, boolean fullDetails);
+
+ /**
+ * A row deletion/tombstone.
+ * <p>
+ * A row deletion mostly consists of the time of said deletion, but there is 2 variants: shadowable
+ * and regular row deletion.
+ * <p>
+ * A shadowable row deletion only exists if the row has no timestamp. In other words, the deletion is only
+ * valid as long as no newer insert is done (thus setting a row timestap; note that if the row timestamp set
+ * is lower than the deletion, it is shadowed (and thus ignored) as usual).
+ * <p>
+ * That is, if a row has a shadowable deletion with timestamp A and an update is madeto that row with a
+ * timestamp B such that B > A (and that update sets the row timestamp), then the shadowable deletion is 'shadowed'
+ * by that update. A concrete consequence is that if said update has cells with timestamp lower than A, then those
+ * cells are preserved(since the deletion is removed), and this contrarily to a normal (regular) deletion where the
+ * deletion is preserved and such cells are removed.
+ * <p>
+ * Currently, the only use of shadowable row deletions is Materialized Views, see CASSANDRA-10261.
+ */
+ public static class Deletion
+ {
+ public static final Deletion LIVE = new Deletion(DeletionTime.LIVE, false);
+
+ private final DeletionTime time;
+ private final boolean isShadowable;
+
+ public Deletion(DeletionTime time, boolean isShadowable)
+ {
+ assert !time.isLive() || !isShadowable;
+ this.time = time;
+ this.isShadowable = isShadowable;
+ }
+
+ public static Deletion regular(DeletionTime time)
+ {
+ return time.isLive() ? LIVE : new Deletion(time, false);
+ }
+
+ public static Deletion shadowable(DeletionTime time)
+ {
+ return new Deletion(time, true);
+ }
+
+ /**
+ * The time of the row deletion.
+ *
+ * @return the time of the row deletion.
+ */
+ public DeletionTime time()
+ {
+ return time;
+ }
+
+ /**
+ * Whether the deletion is a shadowable one or not.
+ *
+ * @return whether the deletion is a shadowable one. Note that if {@code isLive()}, then this is
+ * guarantee to return {@code false}.
+ */
+ public boolean isShadowable()
+ {
+ return isShadowable;
+ }
+
+ /**
+ * Wether the deletion is live or not, that is if its an actual deletion or not.
+ *
+ * @return {@code true} if this represents no deletion of the row, {@code false} if that's an actual
+ * deletion.
+ */
+ public boolean isLive()
+ {
+ return time().isLive();
+ }
+
+ public boolean supersedes(DeletionTime that)
+ {
+ return time.supersedes(that);
+ }
+
+ public boolean supersedes(Deletion that)
+ {
+ return time.supersedes(that.time);
+ }
+
+ public boolean isShadowedBy(LivenessInfo primaryKeyLivenessInfo)
+ {
+ return isShadowable && primaryKeyLivenessInfo.timestamp() > time.markedForDeleteAt();
+ }
+
+ public boolean deletes(LivenessInfo info)
+ {
+ return time.deletes(info);
+ }
+
+ public boolean deletes(Cell cell)
+ {
+ return time.deletes(cell);
+ }
+
+ public void digest(MessageDigest digest)
+ {
+ time.digest(digest);
+ FBUtilities.updateWithBoolean(digest, isShadowable);
+ }
+
+ public int dataSize()
+ {
+ return time.dataSize() + 1;
+ }
+
+ @Override
+ public boolean equals(Object o)
+ {
+ if(!(o instanceof Deletion))
+ return false;
+ Deletion that = (Deletion)o;
+ return this.time.equals(that.time) && this.isShadowable == that.isShadowable;
+ }
+
+ @Override
+ public final int hashCode()
+ {
+ return Objects.hash(time, isShadowable);
+ }
+
+ @Override
+ public String toString()
+ {
+ return String.format("%s%s", time, isShadowable ? "(shadowable)" : "");
+ }
+ }
+
+ /**
+ * Interface for building rows.
+ * <p>
+ * The builder of a row should always abid to the following rules:
+ * 1) {@link #newRow} is always called as the first thing for the row.
+ * 2) {@link #addPrimaryKeyLivenessInfo} and {@link #addRowDeletion}, if called, are called before
+ * any {@link #addCell}/{@link #addComplexDeletion} call.
+ * 3) {@link #build} is called to construct the new row. The builder can then be reused.
+ *
+ * There is 2 variants of a builder: sorted and unsorted ones. A sorted builder expects user to abid to the
+ * following additional rules:
+ * 4) Calls to {@link #addCell}/{@link #addComplexDeletion} are done in strictly increasing column order.
+ * In other words, all calls to these methods for a give column {@code c} are done after any call for
+ * any column before {@code c} and before any call for any column after {@code c}.
+ * 5) Calls to {@link #addCell} are further done in strictly increasing cell order (the one defined by
+ * {@link Cell#comparator}. That is, for a give column, cells are passed in {@code CellPath} order.
+ * 6) No shadowed data should be added. Concretely, this means that if a a row deletion is added, it doesn't
+ * deletes the row timestamp or any cell added later, and similarly no cell added is deleted by the complex
+ * deletion of the column this is a cell of.
+ *
+ * An unsorted builder will not expect those last rules however: {@link #addCell} and {@link #addComplexDeletion}
+ * can be done in any order. And in particular unsorted builder allows multiple calls for the same column/cell. In
+ * that latter case, the result will follow the usual reconciliation rules (so equal cells are reconciled with
+ * {@link Cells#reconcile} and the "biggest" of multiple complex deletion for the same column wins).
+ */
+ public interface Builder
+ {
+ /**
++ * Creates a copy of this {@code Builder}.
++ * @return a copy of this {@code Builder}
++ */
++ public Builder copy();
++
++ /**
+ * Whether the builder is a sorted one or not.
+ *
+ * @return if the builder requires calls to be done in sorted order or not (see above).
+ */
+ public boolean isSorted();
+
+ /**
+ * Prepares the builder to build a new row of clustering {@code clustering}.
+ * <p>
+ * This should always be the first call for a given row.
+ *
+ * @param clustering the clustering for the new row.
+ */
+ public void newRow(Clustering clustering);
+
+ /**
+ * The clustering for the row that is currently being built.
+ *
+ * @return the clustering for the row that is currently being built, or {@code null} if {@link #newRow} hasn't
+ * yet been called.
+ */
+ public Clustering clustering();
+
+ /**
+ * Adds the liveness information for the primary key columns of this row.
+ *
+ * This call is optional (skipping it is equivalent to calling {@code addPartitionKeyLivenessInfo(LivenessInfo.NONE)}).
+ *
+ * @param info the liveness information for the primary key columns of the built row.
+ */
+ public void addPrimaryKeyLivenessInfo(LivenessInfo info);
+
+ /**
+ * Adds the deletion information for this row.
+ *
+ * This call is optional and can be skipped if the row is not deleted.
+ *
+ * @param deletion the row deletion time, or {@code Deletion.LIVE} if the row isn't deleted.
+ */
+ public void addRowDeletion(Deletion deletion);
+
+ /**
+ * Adds a cell to this builder.
+ *
+ * @param cell the cell to add.
+ */
+ public void addCell(Cell cell);
+
+ /**
+ * Adds a complex deletion.
+ *
+ * @param column the column for which to add the {@code complexDeletion}.
+ * @param complexDeletion the complex deletion time to add.
+ */
+ public void addComplexDeletion(ColumnDefinition column, DeletionTime complexDeletion);
+
+ /**
+ * Builds and return built row.
+ *
+ * @return the last row built by this builder.
+ */
+ public Row build();
+ }
+
+ /**
+ * Utility class to help merging rows from multiple inputs (UnfilteredRowIterators).
+ */
+ public static class Merger
+ {
+ private final Row[] rows;
+ private final List<Iterator<ColumnData>> columnDataIterators;
+
+ private Clustering clustering;
+ private int rowsToMerge;
+ private int lastRowSet = -1;
+
+ private final List<ColumnData> dataBuffer = new ArrayList<>();
+ private final ColumnDataReducer columnDataReducer;
+
+ public Merger(int size, int nowInSec, boolean hasComplex)
+ {
+ this.rows = new Row[size];
+ this.columnDataIterators = new ArrayList<>(size);
+ this.columnDataReducer = new ColumnDataReducer(size, nowInSec, hasComplex);
+ }
+
+ public void clear()
+ {
+ dataBuffer.clear();
+ Arrays.fill(rows, null);
+ columnDataIterators.clear();
+ rowsToMerge = 0;
+ lastRowSet = -1;
+ }
+
+ public void add(int i, Row row)
+ {
+ clustering = row.clustering();
+ rows[i] = row;
+ ++rowsToMerge;
+ lastRowSet = i;
+ }
+
+ public Row merge(DeletionTime activeDeletion)
+ {
+ // If for this clustering we have only one row version and have no activeDeletion (i.e. nothing to filter out),
+ // then we can just return that single row
+ if (rowsToMerge == 1 && activeDeletion.isLive())
+ {
+ Row row = rows[lastRowSet];
+ assert row != null;
+ return row;
+ }
+
+ LivenessInfo rowInfo = LivenessInfo.EMPTY;
+ Deletion rowDeletion = Deletion.LIVE;
+ for (Row row : rows)
+ {
+ if (row == null)
+ continue;
+
+ if (row.primaryKeyLivenessInfo().supersedes(rowInfo))
+ rowInfo = row.primaryKeyLivenessInfo();
+ if (row.deletion().supersedes(rowDeletion))
+ rowDeletion = row.deletion();
+ }
+
+ if (rowDeletion.isShadowedBy(rowInfo))
+ rowDeletion = Deletion.LIVE;
+
+ if (rowDeletion.supersedes(activeDeletion))
+ activeDeletion = rowDeletion.time();
+ else
+ rowDeletion = Deletion.LIVE;
+
+ if (activeDeletion.deletes(rowInfo))
+ rowInfo = LivenessInfo.EMPTY;
+
+ for (Row row : rows)
+ columnDataIterators.add(row == null ? Collections.emptyIterator() : row.iterator());
+
+ columnDataReducer.setActiveDeletion(activeDeletion);
+ Iterator<ColumnData> merged = MergeIterator.get(columnDataIterators, ColumnData.comparator, columnDataReducer);
+ while (merged.hasNext())
+ {
+ ColumnData data = merged.next();
+ if (data != null)
+ dataBuffer.add(data);
+ }
+
+ // Because some data might have been shadowed by the 'activeDeletion', we could have an empty row
+ return rowInfo.isEmpty() && rowDeletion.isLive() && dataBuffer.isEmpty()
+ ? null
+ : BTreeRow.create(clustering, rowInfo, rowDeletion, BTree.build(dataBuffer, UpdateFunction.<ColumnData>noOp()));
+ }
+
+ public Clustering mergedClustering()
+ {
+ return clustering;
+ }
+
+ public Row[] mergedRows()
+ {
+ return rows;
+ }
+
+ private static class ColumnDataReducer extends MergeIterator.Reducer<ColumnData, ColumnData>
+ {
+ private final int nowInSec;
+
+ private ColumnDefinition column;
+ private final List<ColumnData> versions;
+
+ private DeletionTime activeDeletion;
+
+ private final ComplexColumnData.Builder complexBuilder;
+ private final List<Iterator<Cell>> complexCells;
+ private final CellReducer cellReducer;
+
+ public ColumnDataReducer(int size, int nowInSec, boolean hasComplex)
+ {
+ this.nowInSec = nowInSec;
+ this.versions = new ArrayList<>(size);
+ this.complexBuilder = hasComplex ? ComplexColumnData.builder() : null;
+ this.complexCells = hasComplex ? new ArrayList<>(size) : null;
+ this.cellReducer = new CellReducer(nowInSec);
+ }
+
+ public void setActiveDeletion(DeletionTime activeDeletion)
+ {
+ this.activeDeletion = activeDeletion;
+ }
+
+ public void reduce(int idx, ColumnData data)
+ {
+ column = data.column();
+ versions.add(data);
+ }
+
+ protected ColumnData getReduced()
+ {
+ if (column.isSimple())
+ {
+ Cell merged = null;
+ for (ColumnData data : versions)
+ {
+ Cell cell = (Cell)data;
+ if (!activeDeletion.deletes(cell))
+ merged = merged == null ? cell : Cells.reconcile(merged, cell, nowInSec);
+ }
+ return merged;
+ }
+ else
+ {
+ complexBuilder.newColumn(column);
+ complexCells.clear();
+ DeletionTime complexDeletion = DeletionTime.LIVE;
+ for (ColumnData data : versions)
+ {
+ ComplexColumnData cd = (ComplexColumnData)data;
+ if (cd.complexDeletion().supersedes(complexDeletion))
+ complexDeletion = cd.complexDeletion();
+ complexCells.add(cd.iterator());
+ }
+
+ if (complexDeletion.supersedes(activeDeletion))
+ {
+ cellReducer.setActiveDeletion(complexDeletion);
+ complexBuilder.addComplexDeletion(complexDeletion);
+ }
+ else
+ {
+ cellReducer.setActiveDeletion(activeDeletion);
+ }
+
+ Iterator<Cell> cells = MergeIterator.get(complexCells, Cell.comparator, cellReducer);
+ while (cells.hasNext())
+ {
+ Cell merged = cells.next();
+ if (merged != null)
+ complexBuilder.addCell(merged);
+ }
+ return complexBuilder.build();
+ }
+ }
+
+ protected void onKeyChange()
+ {
+ versions.clear();
+ }
+ }
+
+ private static class CellReducer extends MergeIterator.Reducer<Cell, Cell>
+ {
+ private final int nowInSec;
+
+ private DeletionTime activeDeletion;
+ private Cell merged;
+
+ public CellReducer(int nowInSec)
+ {
+ this.nowInSec = nowInSec;
+ }
+
+ public void setActiveDeletion(DeletionTime activeDeletion)
+ {
+ this.activeDeletion = activeDeletion;
+ onKeyChange();
+ }
+
+ public void reduce(int idx, Cell cell)
+ {
+ if (!activeDeletion.deletes(cell))
+ merged = merged == null ? cell : Cells.reconcile(merged, cell, nowInSec);
+ }
+
+ protected Cell getReduced()
+ {
+ return merged;
+ }
+
+ protected void onKeyChange()
+ {
+ merged = null;
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/cassandra/blob/aeca1d2b/src/java/org/apache/cassandra/utils/btree/BTree.java
----------------------------------------------------------------------
diff --cc src/java/org/apache/cassandra/utils/btree/BTree.java
index fe08011,1145d12..e6e6e40
--- a/src/java/org/apache/cassandra/utils/btree/BTree.java
+++ b/src/java/org/apache/cassandra/utils/btree/BTree.java
@@@ -744,301 -361,15 +744,320 @@@ public class BTre
}
};
- // return a sorted collection
- private static <V> Collection<V> sorted(Iterable<V> source, Comparator<V> comparator, int size)
+ public static <V> Builder<V> builder(Comparator<? super V> comparator)
+ {
+ return new Builder<>(comparator);
+ }
+
+ public static <V> Builder<V> builder(Comparator<? super V> comparator, int initialCapacity)
{
- V[] vs = (V[]) new Object[size];
- int i = 0;
- for (V v : source)
- vs[i++] = v;
- Arrays.sort(vs, comparator);
- return Arrays.asList(vs);
+ return new Builder<>(comparator);
+ }
+
+ public static class Builder<V>
+ {
+
+ // a user-defined bulk resolution, to be applied manually via resolve()
+ public static interface Resolver
+ {
+ // can return a different output type to input, so long as sort order is maintained
+ // if a resolver is present, this method will be called for every sequence of equal inputs
+ // even those with only one item
+ Object resolve(Object[] array, int lb, int ub);
+ }
+
+ // a user-defined resolver that is applied automatically on encountering two duplicate values
+ public static interface QuickResolver<V>
+ {
+ // can return a different output type to input, so long as sort order is maintained
+ // if a resolver is present, this method will be called for every sequence of equal inputs
+ // even those with only one item
+ V resolve(V a, V b);
+ }
+
+ Comparator<? super V> comparator;
+ Object[] values;
+ int count;
+ boolean detected = true; // true if we have managed to cheaply ensure sorted (+ filtered, if resolver == null) as we have added
+ boolean auto = true; // false if the user has promised to enforce the sort order and resolve any duplicates
+ QuickResolver<V> quickResolver;
+
+ protected Builder(Comparator<? super V> comparator)
+ {
+ this(comparator, 16);
+ }
+
+ protected Builder(Comparator<? super V> comparator, int initialCapacity)
+ {
+ this.comparator = comparator;
+ this.values = new Object[initialCapacity];
+ }
+
++ private Builder(Builder<V> builder)
++ {
++ this.comparator = builder.comparator;
++ this.values = Arrays.copyOf(builder.values, builder.values.length);
++ this.count = builder.count;
++ this.detected = builder.detected;
++ this.auto = builder.auto;
++ this.quickResolver = builder.quickResolver;
++ }
++
++ /**
++ * Creates a copy of this {@code Builder}.
++ * @return a copy of this {@code Builder}.
++ */
++ public Builder<V> copy()
++ {
++ return new Builder<>(this);
++ }
++
+ public Builder<V> setQuickResolver(QuickResolver<V> quickResolver)
+ {
+ this.quickResolver = quickResolver;
+ return this;
+ }
+
+ public void reuse()
+ {
+ reuse(comparator);
+ }
+
+ public void reuse(Comparator<? super V> comparator)
+ {
+ this.comparator = comparator;
+ count = 0;
+ detected = true;
+ }
+
+ public Builder<V> auto(boolean auto)
+ {
+ this.auto = auto;
+ return this;
+ }
+
+ public Builder<V> add(V v)
+ {
+ if (count == values.length)
+ values = Arrays.copyOf(values, count * 2);
+
+ Object[] values = this.values;
+ int prevCount = this.count++;
+ values[prevCount] = v;
+
+ if (auto && detected && prevCount > 0)
+ {
+ V prev = (V) values[prevCount - 1];
+ int c = comparator.compare(prev, v);
+ if (c == 0 && auto)
+ {
+ count = prevCount;
+ if (quickResolver != null)
+ values[prevCount - 1] = quickResolver.resolve(prev, v);
+ }
+ else if (c > 0)
+ {
+ detected = false;
+ }
+ }
+
+ return this;
+ }
+
+ public Builder<V> addAll(Collection<V> add)
+ {
+ if (auto && add instanceof SortedSet && equalComparators(comparator, ((SortedSet) add).comparator()))
+ {
+ // if we're a SortedSet, permit quick order-preserving addition of items
+ // if we collect all duplicates, don't bother as merge will necessarily be more expensive than sorting at end
+ return mergeAll(add, add.size());
+ }
+ detected = false;
+ if (values.length < count + add.size())
+ values = Arrays.copyOf(values, max(count + add.size(), count * 2));
+ for (V v : add)
+ values[count++] = v;
+ return this;
+ }
+
+ private static boolean equalComparators(Comparator<?> a, Comparator<?> b)
+ {
+ return a == b || (isNaturalComparator(a) && isNaturalComparator(b));
+ }
+
+ private static boolean isNaturalComparator(Comparator<?> a)
+ {
+ return a == null || a == Comparator.naturalOrder() || a == Ordering.natural();
+ }
+
+ // iter must be in sorted order!
+ private Builder<V> mergeAll(Iterable<V> add, int addCount)
+ {
+ assert auto;
+ // ensure the existing contents are in order
+ autoEnforce();
+
+ int curCount = count;
+ // we make room for curCount * 2 + addCount, so that we can copy the current values to the end
+ // if necessary for continuing the merge, and have the new values directly after the current value range
+ if (values.length < curCount * 2 + addCount)
+ values = Arrays.copyOf(values, max(curCount * 2 + addCount, curCount * 3));
+
+ if (add instanceof BTreeSet)
+ {
+ // use btree set's fast toArray method, to append directly
+ ((BTreeSet) add).toArray(values, curCount);
+ }
+ else
+ {
+ // consider calling toArray() and System.arraycopy
+ int i = curCount;
+ for (V v : add)
+ values[i++] = v;
+ }
+ return mergeAll(addCount);
+ }
+
+ private Builder<V> mergeAll(int addCount)
+ {
+ Object[] a = values;
+ int addOffset = count;
+
+ int i = 0, j = addOffset;
+ int curEnd = addOffset, addEnd = addOffset + addCount;
+
+ // save time in cases where we already have a subset, by skipping dir
+ while (i < curEnd && j < addEnd)
+ {
+ V ai = (V) a[i], aj = (V) a[j];
+ // in some cases, such as Columns, we may have identity supersets, so perform a cheap object-identity check
+ int c = ai == aj ? 0 : comparator.compare(ai, aj);
+ if (c > 0)
+ break;
+ else if (c == 0)
+ {
+ if (quickResolver != null)
+ a[i] = quickResolver.resolve(ai, aj);
+ j++;
+ }
+ i++;
+ }
+
+ if (j == addEnd)
+ return this; // already a superset of the new values
+
+ // otherwise, copy the remaining existing values to the very end, freeing up space for merge result
+ int newCount = i;
+ System.arraycopy(a, i, a, addEnd, count - i);
+ curEnd = addEnd + (count - i);
+ i = addEnd;
+
+ while (i < curEnd && j < addEnd)
+ {
+ V ai = (V) a[i];
+ V aj = (V) a[j];
+ // could avoid one comparison if we cared, but would make this ugly
+ int c = comparator.compare(ai, aj);
+ if (c == 0)
+ {
+ Object newValue = quickResolver == null ? ai : quickResolver.resolve(ai, aj);
+ a[newCount++] = newValue;
+ i++;
+ j++;
+ }
+ else
+ {
+ a[newCount++] = c < 0 ? a[i++] : a[j++];
+ }
+ }
+
+ // exhausted one of the inputs; fill in remainder of the other
+ if (i < curEnd)
+ {
+ System.arraycopy(a, i, a, newCount, curEnd - i);
+ newCount += curEnd - i;
+ }
+ else if (j < addEnd)
+ {
+ if (j != newCount)
+ System.arraycopy(a, j, a, newCount, addEnd - j);
+ newCount += addEnd - j;
+ }
+ count = newCount;
+ return this;
+ }
+
+ public boolean isEmpty()
+ {
+ return count == 0;
+ }
+
+ public Builder<V> reverse()
+ {
+ assert !auto;
+ int mid = count / 2;
+ for (int i = 0 ; i < mid ; i++)
+ {
+ Object t = values[i];
+ values[i] = values[count - (1 + i)];
+ values[count - (1 + i)] = t;
+ }
+ return this;
+ }
+
+ public Builder<V> sort()
+ {
+ Arrays.sort((V[]) values, 0, count, comparator);
+ return this;
+ }
+
+ // automatically enforce sorted+filtered
+ private void autoEnforce()
+ {
+ if (!detected && count > 1)
+ {
+ sort();
+ int prevIdx = 0;
+ V prev = (V) values[0];
+ for (int i = 1 ; i < count ; i++)
+ {
+ V next = (V) values[i];
+ if (comparator.compare(prev, next) != 0)
+ values[++prevIdx] = prev = next;
+ else if (quickResolver != null)
+ values[prevIdx] = prev = quickResolver.resolve(prev, next);
+ }
+ count = prevIdx + 1;
+ }
+ detected = true;
+ }
+
+ public Builder<V> resolve(Resolver resolver)
+ {
+ if (count > 0)
+ {
+ int c = 0;
+ int prev = 0;
+ for (int i = 1 ; i < count ; i++)
+ {
+ if (comparator.compare((V) values[i], (V) values[prev]) != 0)
+ {
+ values[c++] = resolver.resolve((V[]) values, prev, i);
+ prev = i;
+ }
+ }
+ values[c++] = resolver.resolve((V[]) values, prev, count);
+ count = c;
+ }
+ return this;
+ }
+
+ public Object[] build()
+ {
+ if (auto)
+ autoEnforce();
+ return BTree.build(Arrays.asList(values).subList(0, count), UpdateFunction.noOp());
+ }
}
/** simple static wrapper to calls to cmp.compare() which checks if either a or b are Special (i.e. represent an infinity) */
http://git-wip-us.apache.org/repos/asf/cassandra/blob/aeca1d2b/test/unit/org/apache/cassandra/cql3/validation/entities/CollectionsTest.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/aeca1d2b/test/unit/org/apache/cassandra/db/rows/RowBuilder.java
----------------------------------------------------------------------
diff --cc test/unit/org/apache/cassandra/db/rows/RowBuilder.java
index b1223f1,0000000..ede2ccd
mode 100644,000000..100644
--- a/test/unit/org/apache/cassandra/db/rows/RowBuilder.java
+++ b/test/unit/org/apache/cassandra/db/rows/RowBuilder.java
@@@ -1,84 -1,0 +1,91 @@@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.db.rows;
+
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.cassandra.config.ColumnDefinition;
+import org.apache.cassandra.db.Clustering;
+import org.apache.cassandra.db.DeletionTime;
+import org.apache.cassandra.db.LivenessInfo;
++import org.apache.cassandra.db.rows.Row.Builder;
+import org.apache.cassandra.utils.Pair;
+
+/**
+ * Instrumented Builder implementation for testing the
+ * behavior of Cells and Rows static methods
+ */
+public class RowBuilder implements Row.Builder
+{
+ public List<Cell> cells = new LinkedList<>();
+ public Clustering clustering = null;
+ public LivenessInfo livenessInfo = null;
+ public Row.Deletion deletionTime = null;
+ public List<Pair<ColumnDefinition, DeletionTime>> complexDeletions = new LinkedList<>();
+
++ @Override
++ public Builder copy()
++ {
++ throw new UnsupportedOperationException();
++ }
++
+ public void addCell(Cell cell)
+ {
+ cells.add(cell);
+ }
+
+ public boolean isSorted()
+ {
+ throw new UnsupportedOperationException();
+ }
+
+ public void newRow(Clustering clustering)
+ {
+ assert this.clustering == null;
+ this.clustering = clustering;
+ }
+
+ public Clustering clustering()
+ {
+ return clustering;
+ }
+
+ public void addPrimaryKeyLivenessInfo(LivenessInfo info)
+ {
+ assert livenessInfo == null;
+ livenessInfo = info;
+ }
+
+ public void addRowDeletion(Row.Deletion deletion)
+ {
+ assert deletionTime == null;
+ deletionTime = deletion;
+ }
+
+ public void addComplexDeletion(ColumnDefinition column, DeletionTime complexDeletion)
+ {
+ complexDeletions.add(Pair.create(column, complexDeletion));
+ }
+
+ public Row build()
+ {
+ throw new UnsupportedOperationException();
+ }
+}