You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by al...@apache.org on 2014/02/11 18:35:57 UTC
[2/3] git commit: Optimize (rewrite) ArrayBackedSortedColumns
Optimize (rewrite) ArrayBackedSortedColumns
patch by Aleksey Yeschenko; reviewed by Benedict Elliott Smith for
CASSANDRA-6662
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/fb1c6b9c
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/fb1c6b9c
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/fb1c6b9c
Branch: refs/heads/trunk
Commit: fb1c6b9cded56e63dfcc765515edbc94ee9f67a0
Parents: 0e43885
Author: Aleksey Yeschenko <al...@apache.org>
Authored: Tue Feb 11 20:30:57 2014 +0300
Committer: Aleksey Yeschenko <al...@apache.org>
Committed: Tue Feb 11 20:35:31 2014 +0300
----------------------------------------------------------------------
CHANGES.txt | 3 +-
.../cassandra/db/ArrayBackedSortedColumns.java | 333 +++++++++++++------
.../db/ArrayBackedSortedColumnsTest.java | 69 ++++
3 files changed, 305 insertions(+), 100 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/fb1c6b9c/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 7f9f9d2..d8478e9 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -25,7 +25,8 @@
* CF id is changed to be non-deterministic. Data dir/key cache are created
uniquely for CF id (CASSANDRA-5202)
* New counters implementation (CASSANDRA-6504)
- * Replace UnsortedColumns usage with ArrayBackedSortedColumns (CASSANDRA-6630)
+ * Replace UnsortedColumns and TreeMapBackedSortedColumns with rewritten
+ ArrayBackedSortedColumns (CASSANDRA-6630, CASSANDRA-6662)
* Add option to use row cache with a given amount of rows (CASSANDRA-5357)
* Avoid repairing already repaired data (CASSANDRA-5351)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/fb1c6b9c/src/java/org/apache/cassandra/db/ArrayBackedSortedColumns.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/ArrayBackedSortedColumns.java b/src/java/org/apache/cassandra/db/ArrayBackedSortedColumns.java
index ba082e9..a6153e4 100644
--- a/src/java/org/apache/cassandra/db/ArrayBackedSortedColumns.java
+++ b/src/java/org/apache/cassandra/db/ArrayBackedSortedColumns.java
@@ -31,16 +31,23 @@ import org.apache.cassandra.db.composites.Composite;
import org.apache.cassandra.db.filter.ColumnSlice;
/**
- * A ColumnFamily backed by an ArrayList.
+ * A ColumnFamily backed by an array.
* This implementation is not synchronized and should only be used when
* thread-safety is not required. This implementation makes sense when the
- * main operations performed are iterating over the map and adding cells
+ * main operations performed are iterating over the cells and adding cells
* (especially if insertion is in sorted order).
*/
public class ArrayBackedSortedColumns extends AbstractThreadUnsafeSortedColumns
{
+ private static final int INITIAL_CAPACITY = 10;
+
+ private static final Cell[] EMPTY_ARRAY = new Cell[0];
+
private final boolean reversed;
- private final ArrayList<Cell> cells;
+
+ private Cell[] cells;
+ private int size;
+ private int sortedSize;
public static final ColumnFamily.Factory<ArrayBackedSortedColumns> factory = new Factory<ArrayBackedSortedColumns>()
{
@@ -54,14 +61,18 @@ public class ArrayBackedSortedColumns extends AbstractThreadUnsafeSortedColumns
{
super(metadata);
this.reversed = reversed;
- this.cells = new ArrayList<>();
+ this.cells = new Cell[INITIAL_CAPACITY];
+ this.size = 0;
+ this.sortedSize = 0;
}
- private ArrayBackedSortedColumns(Collection<Cell> cells, CFMetaData metadata, boolean reversed)
+ private ArrayBackedSortedColumns(ArrayBackedSortedColumns original)
{
- super(metadata);
- this.reversed = reversed;
- this.cells = new ArrayList<>(cells);
+ super(original.metadata);
+ this.reversed = original.reversed;
+ this.cells = Arrays.copyOf(original.cells, original.size);
+ this.size = original.size;
+ this.sortedSize = original.sortedSize;
}
public ColumnFamily.Factory getFactory()
@@ -71,7 +82,7 @@ public class ArrayBackedSortedColumns extends AbstractThreadUnsafeSortedColumns
public ColumnFamily cloneMe()
{
- return new ArrayBackedSortedColumns(cells, metadata, reversed);
+ return new ArrayBackedSortedColumns(this);
}
public boolean isInsertReversed()
@@ -84,72 +95,191 @@ public class ArrayBackedSortedColumns extends AbstractThreadUnsafeSortedColumns
return reversed ? getComparator().reverseComparator() : getComparator();
}
+ private void maybeSortCells()
+ {
+ if (size != sortedSize)
+ sortCells();
+ }
+
+ private void sortCells()
+ {
+ Comparator<Cell> comparator = reversed
+ ? getComparator().columnReverseComparator()
+ : getComparator().columnComparator();
+
+ // Sort the unsorted segment - will still potentially contain duplicate (non-reconciled) cells
+ Arrays.sort(cells, sortedSize, size, comparator);
+
+ // Determine the merge start position for that segment
+ int pos = binarySearch(0, sortedSize, cells[sortedSize].name, internalComparator());
+ if (pos < 0)
+ pos = -pos - 1;
+
+ // Copy [pos, lastSortedCellIndex] cells into a separate array
+ Cell[] leftCopy = pos == sortedSize
+ ? EMPTY_ARRAY
+ : Arrays.copyOfRange(cells, pos, sortedSize);
+
+ // Store the beginning (inclusive) and the end (exclusive) indexes of the right segment
+ int rightStart = sortedSize;
+ int rightEnd = size;
+
+ // 'Trim' the size to what's left without the leftCopy
+ size = sortedSize = pos;
+
+ // Merge the cells from both segments. When adding from the left segment we can rely on it not having any
+ // duplicate cells, and thus omit the comparison with the previously entered cell - we'll never need to reconcile.
+ int l = 0, r = rightStart;
+ while (l < leftCopy.length && r < rightEnd)
+ {
+ int cmp = comparator.compare(leftCopy[l], cells[r]);
+ if (cmp < 0)
+ internalAppend(leftCopy[l++]);
+ else if (cmp == 0)
+ internalAppend(leftCopy[l++].reconcile(cells[r++]));
+ else
+ internalAppendOrReconcile(cells[r++]);
+ }
+ while (l < leftCopy.length)
+ internalAppend(leftCopy[l++]);
+ while (r < rightEnd)
+ internalAppendOrReconcile(cells[r++]);
+
+ // Nullify the remainder of the array (in case we had duplicate cells that got reconciled)
+ for (int i = size; i < rightEnd; i++)
+ cells[i] = null;
+ }
+
public Cell getColumn(CellName name)
{
+ maybeSortCells();
int pos = binarySearch(name);
- return pos >= 0 ? cells.get(pos) : null;
+ return pos >= 0 ? cells[pos] : null;
}
public void addColumn(Cell cell)
{
- if (cells.isEmpty())
+ if (size == 0)
{
- cells.add(cell);
+ internalAdd(cell);
+ sortedSize++;
return;
}
- int c = internalComparator().compare(cells.get(getColumnCount() - 1).name(), cell.name());
+ if (size != sortedSize)
+ {
+ internalAdd(cell);
+ return;
+ }
+ int c = internalComparator().compare(cells[size - 1].name(), cell.name());
if (c < 0)
{
- // Insert as last
- cells.add(cell);
+ // Append to the end
+ internalAdd(cell);
+ sortedSize++;
}
else if (c == 0)
{
- // Resolve against last
- resolveAgainst(getColumnCount() - 1, cell);
+ // Resolve against the last cell
+ reconcileWith(size - 1, cell);
}
else
{
- int pos = binarySearch(cell.name());
- if (pos >= 0)
- resolveAgainst(pos, cell);
- else
- cells.add(-pos - 1, cell);
+ // Append to the end, making cells unsorted from now on
+ internalAdd(cell);
}
}
+ public void addAll(ColumnFamily other)
+ {
+ delete(other.deletionInfo());
+
+ if (other.getColumnCount() == 0)
+ return;
+
+ Iterator<Cell> iterator = reversed ? other.reverseIterator() : other.iterator();
+ while (iterator.hasNext())
+ addColumn(iterator.next());
+ }
+
+ /**
+ * Add a cell to the array, 'resizing' it first if necessary (if it doesn't fit).
+ */
+ private void internalAdd(Cell cell)
+ {
+ // Resize the backing array if we hit the capacity
+ if (cells.length == size)
+ cells = Arrays.copyOf(cells, size * 3 / 2 + 1);
+ cells[size++] = cell;
+ }
+
+ /**
+ * Appends a cell to the array, with the knowledge that array has enough capacity for the new cell, and that
+ * the cell is being added in the sorted order, but may or may not need to be reconciled with the previously
+ * appended one.
+ */
+ private void internalAppendOrReconcile(Cell cell)
+ {
+ if (size > 0 && cells[size - 1].name().equals(cell.name()))
+ reconcileWith(size - 1, cell);
+ else
+ internalAppend(cell);
+ }
+
+ /**
+ * Appends a cell to the array, with the knowledge that array has enough capacity for the new cell, and that
+ * the cell is being added in the sorted order, and the added cell is not a duplicate of a previously inserted one.
+ */
+ private void internalAppend(Cell cell)
+ {
+ cells[size] = cell;
+ size++;
+ sortedSize++;
+ }
+
+ /**
+ * Remove the cell at a given index, shifting the rest of the array to the left if needed.
+ * Please note that we mostly remove from the end, so the shifting should be rare.
+ */
+ private void internalRemove(int index)
+ {
+ int moving = size - index - 1;
+ if (moving > 0)
+ System.arraycopy(cells, index + 1, cells, index, moving);
+ cells[--size] = null;
+ }
+
/**
- * Resolve against element at position i.
+ * Reconcile with a cell at position i.
* Assume that i is a valid position.
*/
- private void resolveAgainst(int i, Cell cell)
+ private void reconcileWith(int i, Cell cell)
{
- cells.set(i, cell.reconcile(cells.get(i)));
+ cells[i] = cell.reconcile(cells[i]);
}
private int binarySearch(CellName name)
{
- return binarySearch(cells, internalComparator(), name, 0);
+ return binarySearch(0, size, name, internalComparator());
}
/**
- * Simple binary search for a given column name.
+ * Simple binary search for a given cell name.
* The return value has the exact same meaning that the one of Collections.binarySearch().
* (We don't use Collections.binarySearch() directly because it would require us to create
* a fake Cell (as well as an Cell comparator) to do the search, which is ugly.
*/
- private static int binarySearch(List<Cell> cells, Comparator<Composite> comparator, Composite name, int start)
+ private int binarySearch(int fromIndex, int toIndex, Composite name, Comparator<Composite> comparator)
{
- int low = start;
- int mid = cells.size();
+ int low = fromIndex;
+ int mid = toIndex;
int high = mid - 1;
int result = -1;
while (low <= high)
{
mid = (low + high) >> 1;
- if ((result = comparator.compare(name, cells.get(mid).name())) > 0)
+ if ((result = comparator.compare(name, cells[mid].name())) > 0)
low = mid + 1;
else if (result == 0)
return mid;
@@ -159,78 +289,36 @@ public class ArrayBackedSortedColumns extends AbstractThreadUnsafeSortedColumns
return -mid - (result < 0 ? 1 : 2);
}
- public void addAll(ColumnFamily cm)
- {
- delete(cm.deletionInfo());
- if (cm.getColumnCount() == 0)
- return;
-
- Cell[] copy = cells.toArray(new Cell[getColumnCount()]);
- int idx = 0;
- Iterator<Cell> other = reversed ? cm.reverseIterator(ColumnSlice.ALL_COLUMNS_ARRAY) : cm.iterator();
- Cell otherCell = other.next();
-
- cells.clear();
-
- while (idx < copy.length && otherCell != null)
- {
- int c = internalComparator().compare(copy[idx].name(), otherCell.name());
- if (c < 0)
- {
- cells.add(copy[idx]);
- idx++;
- }
- else if (c > 0)
- {
- cells.add(otherCell);
- otherCell = other.hasNext() ? other.next() : null;
- }
- else // c == 0
- {
- cells.add(copy[idx]);
- resolveAgainst(getColumnCount() - 1, otherCell);
- idx++;
- otherCell = other.hasNext() ? other.next() : null;
- }
- }
-
- while (idx < copy.length)
- cells.add(copy[idx++]);
-
- while (otherCell != null)
- {
- cells.add(otherCell);
- otherCell = other.hasNext() ? other.next() : null;
- }
- }
-
public Collection<Cell> getSortedColumns()
{
- return reversed ? new ReverseSortedCollection() : cells;
+ maybeSortCells();
+ return reversed ? new ReverseSortedCollection() : new ForwardSortedCollection();
}
public Collection<Cell> getReverseSortedColumns()
{
- // If reversed, the element are sorted reversely, so we could expect
- // to return *this*, but *this* redefine the iterator to be in sorted
- // order, so we need a collection that uses the super constructor
+ maybeSortCells();
return reversed ? new ForwardSortedCollection() : new ReverseSortedCollection();
}
public int getColumnCount()
{
- return cells.size();
+ maybeSortCells();
+ return size;
}
public void clear()
{
setDeletionInfo(DeletionInfo.live());
- cells.clear();
+ for (int i = 0; i < size; i++)
+ cells[i] = null;
+ size = sortedSize = 0;
}
public Iterable<CellName> getColumnNames()
{
- return Iterables.transform(cells, new Function<Cell, CellName>()
+ maybeSortCells();
+ return Iterables.transform(new ForwardSortedCollection(), new Function<Cell, CellName>()
{
public CellName apply(Cell cell)
{
@@ -241,17 +329,19 @@ public class ArrayBackedSortedColumns extends AbstractThreadUnsafeSortedColumns
public Iterator<Cell> iterator(ColumnSlice[] slices)
{
- return new SlicesIterator(cells, getComparator(), slices, reversed);
+ maybeSortCells();
+ return new SlicesIterator(Arrays.asList(cells).subList(0, size), getComparator(), slices, reversed);
}
public Iterator<Cell> reverseIterator(ColumnSlice[] slices)
{
- return new SlicesIterator(cells, getComparator(), slices, !reversed);
+ maybeSortCells();
+ return new SlicesIterator(Arrays.asList(cells).subList(0, size), getComparator(), slices, !reversed);
}
private static class SlicesIterator extends AbstractIterator<Cell>
{
- private final List<Cell> list;
+ private final List<Cell> cells;
private final ColumnSlice[] slices;
private final Comparator<Composite> comparator;
@@ -259,9 +349,9 @@ public class ArrayBackedSortedColumns extends AbstractThreadUnsafeSortedColumns
private int previousSliceEnd = 0;
private Iterator<Cell> currentSlice;
- public SlicesIterator(List<Cell> list, CellNameType comparator, ColumnSlice[] slices, boolean reversed)
+ public SlicesIterator(List<Cell> cells, CellNameType comparator, ColumnSlice[] slices, boolean reversed)
{
- this.list = reversed ? Lists.reverse(list) : list;
+ this.cells = reversed ? Lists.reverse(cells) : cells;
this.slices = slices;
this.comparator = reversed ? comparator.reverseComparator() : comparator;
}
@@ -275,21 +365,21 @@ public class ArrayBackedSortedColumns extends AbstractThreadUnsafeSortedColumns
ColumnSlice slice = slices[idx++];
// The first idx to include
- int startIdx = slice.start.isEmpty() ? 0 : binarySearch(list, comparator, slice.start, previousSliceEnd);
+ int startIdx = slice.start.isEmpty() ? 0 : binarySearch(previousSliceEnd, slice.start);
if (startIdx < 0)
startIdx = -startIdx - 1;
// The first idx to exclude
- int finishIdx = slice.finish.isEmpty() ? list.size() - 1 : binarySearch(list, comparator, slice.finish, previousSliceEnd);
+ int finishIdx = slice.finish.isEmpty() ? cells.size() - 1 : binarySearch(previousSliceEnd, slice.finish);
if (finishIdx >= 0)
finishIdx++;
else
finishIdx = -finishIdx - 1;
- if (startIdx == 0 && finishIdx == list.size())
- currentSlice = list.iterator();
+ if (startIdx == 0 && finishIdx == cells.size())
+ currentSlice = cells.iterator();
else
- currentSlice = list.subList(startIdx, finishIdx).iterator();
+ currentSlice = cells.subList(startIdx, finishIdx).iterator();
previousSliceEnd = finishIdx > 0 ? finishIdx - 1 : 0;
}
@@ -300,20 +390,40 @@ public class ArrayBackedSortedColumns extends AbstractThreadUnsafeSortedColumns
currentSlice = null;
return computeNext();
}
+
+ // Copy of ABSC.binarySearch() that takes lists
+ private int binarySearch(int fromIndex, Composite name)
+ {
+ int low = fromIndex;
+ int mid = cells.size();
+ int high = mid - 1;
+ int result = -1;
+ while (low <= high)
+ {
+ mid = (low + high) >> 1;
+ if ((result = comparator.compare(name, cells.get(mid).name())) > 0)
+ low = mid + 1;
+ else if (result == 0)
+ return mid;
+ else
+ high = mid - 1;
+ }
+ return -mid - (result < 0 ? 1 : 2);
+ }
}
private class ReverseSortedCollection extends AbstractCollection<Cell>
{
public int size()
{
- return cells.size();
+ return size;
}
public Iterator<Cell> iterator()
{
return new Iterator<Cell>()
{
- int idx = size() - 1;
+ int idx = size - 1;
boolean shouldCallNext = true;
public boolean hasNext()
@@ -324,15 +434,16 @@ public class ArrayBackedSortedColumns extends AbstractThreadUnsafeSortedColumns
public Cell next()
{
shouldCallNext = false;
- return cells.get(idx--);
+ return cells[idx--];
}
public void remove()
{
if (shouldCallNext)
throw new IllegalStateException();
- cells.remove(idx + 1);
+ internalRemove(idx + 1);
shouldCallNext = true;
+ sortedSize--;
}
};
}
@@ -342,12 +453,36 @@ public class ArrayBackedSortedColumns extends AbstractThreadUnsafeSortedColumns
{
public int size()
{
- return cells.size();
+ return size;
}
public Iterator<Cell> iterator()
{
- return cells.iterator();
+ return new Iterator<Cell>()
+ {
+ int idx = 0;
+ boolean shouldCallNext = true;
+
+ public boolean hasNext()
+ {
+ return idx < size;
+ }
+
+ public Cell next()
+ {
+ shouldCallNext = false;
+ return cells[idx++];
+ }
+
+ public void remove()
+ {
+ if (shouldCallNext)
+ throw new IllegalStateException();
+ internalRemove(--idx);
+ shouldCallNext = true;
+ sortedSize--;
+ }
+ };
}
}
}
http://git-wip-us.apache.org/repos/asf/cassandra/blob/fb1c6b9c/test/unit/org/apache/cassandra/db/ArrayBackedSortedColumnsTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/db/ArrayBackedSortedColumnsTest.java b/test/unit/org/apache/cassandra/db/ArrayBackedSortedColumnsTest.java
index d98dab6..a1c98f3 100644
--- a/test/unit/org/apache/cassandra/db/ArrayBackedSortedColumnsTest.java
+++ b/test/unit/org/apache/cassandra/db/ArrayBackedSortedColumnsTest.java
@@ -63,6 +63,75 @@ public class ArrayBackedSortedColumnsTest extends SchemaLoader
}
@Test
+ public void testOutOfOrder()
+ {
+ testAddOutOfOrder(false);
+ testAddOutOfOrder(false);
+ }
+
+ private void testAddOutOfOrder(boolean reversed)
+ {
+ CellNameType type = new SimpleDenseCellNameType(Int32Type.instance);
+ ColumnFamily cells = ArrayBackedSortedColumns.factory.create(metadata(), reversed);
+
+ int[] values = new int[]{ 1, 2, 1, 3, 4, 4, 5, 5, 1, 2, 6, 6, 6, 1, 2, 3 };
+ for (int i = 0; i < values.length; ++i)
+ cells.addColumn(new Cell(type.makeCellName(values[reversed ? values.length - 1 - i : i])));
+
+ assertEquals(6, cells.getColumnCount());
+
+ Iterator<Cell> iter = cells.iterator();
+ assertEquals(1, iter.next().name().toByteBuffer().getInt(0));
+ assertEquals(2, iter.next().name().toByteBuffer().getInt(0));
+ assertEquals(3, iter.next().name().toByteBuffer().getInt(0));
+ assertEquals(4, iter.next().name().toByteBuffer().getInt(0));
+ assertEquals(5, iter.next().name().toByteBuffer().getInt(0));
+ assertEquals(6, iter.next().name().toByteBuffer().getInt(0));
+
+ // Add more values
+ values = new int[]{ 11, 15, 12, 12, 12, 16, 10, 8, 8, 7, 4, 4, 5 };
+ for (int i = 0; i < values.length; ++i)
+ cells.addColumn(new Cell(type.makeCellName(values[reversed ? values.length - 1 - i : i])));
+
+ assertEquals(13, cells.getColumnCount());
+
+ iter = cells.reverseIterator();
+ assertEquals(16, iter.next().name().toByteBuffer().getInt(0));
+ assertEquals(15, iter.next().name().toByteBuffer().getInt(0));
+ assertEquals(12, iter.next().name().toByteBuffer().getInt(0));
+ assertEquals(11, iter.next().name().toByteBuffer().getInt(0));
+ assertEquals(10, iter.next().name().toByteBuffer().getInt(0));
+ assertEquals(8, iter.next().name().toByteBuffer().getInt(0));
+ assertEquals(7, iter.next().name().toByteBuffer().getInt(0));
+ assertEquals(6, iter.next().name().toByteBuffer().getInt(0));
+ assertEquals(5, iter.next().name().toByteBuffer().getInt(0));
+ assertEquals(4, iter.next().name().toByteBuffer().getInt(0));
+ assertEquals(3, iter.next().name().toByteBuffer().getInt(0));
+ assertEquals(2, iter.next().name().toByteBuffer().getInt(0));
+ assertEquals(1, iter.next().name().toByteBuffer().getInt(0));
+ }
+
+ @Test
+ public void testGetColumn()
+ {
+ testGetColumnInternal(true);
+ testGetColumnInternal(false);
+ }
+
+ private void testGetColumnInternal(boolean reversed)
+ {
+ CellNameType type = new SimpleDenseCellNameType(Int32Type.instance);
+ ColumnFamily cells = ArrayBackedSortedColumns.factory.create(metadata(), reversed);
+
+ int[] values = new int[]{ -1, 20, 44, 55, 27, 27, 17, 1, 9, 89, 33, 44, 0, 9 };
+ for (int i = 0; i < values.length; ++i)
+ cells.addColumn(new Cell(type.makeCellName(values[reversed ? values.length - 1 - i : i])));
+
+ for (int i : values)
+ assertEquals(i, cells.getColumn(type.makeCellName(i)).name().toByteBuffer().getInt(0));
+ }
+
+ @Test
public void testAddAll()
{
testAddAllInternal(false);