You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by jb...@apache.org on 2013/09/01 04:36:16 UTC
[1/7] git commit: Optimize name query performance in wide rows patch
by rbranson; reviewed by jbellis for CASSANDRA-5966
Updated Branches:
refs/heads/cassandra-2.0 83a43f121 -> 7375c03aa
refs/heads/trunk 0ebfa3fbb -> 7a0618c27
Optimize name query performance in wide rows
patch by rbranson; reviewed by jbellis for CASSANDRA-5966
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/b14273b4
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/b14273b4
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/b14273b4
Branch: refs/heads/cassandra-2.0
Commit: b14273b4352ac814946d8e1b9f9be479ddae8d0b
Parents: 1a50215
Author: Jonathan Ellis <jb...@apache.org>
Authored: Sat Aug 31 20:35:19 2013 -0500
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Sat Aug 31 20:35:19 2013 -0500
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../db/columniterator/SSTableNamesIterator.java | 37 ++++++++++++++++----
2 files changed, 31 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/b14273b4/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 777d0d1..1943217 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
1.2.10
+ * Optimize name query performance in wide rows (CASSANDRA-5966)
* Upgrade metrics-core to version 2.2.0 (CASSANDRA-5947)
* Add snitch, schema version, cluster, partitioner to JMX (CASSANDRA-5881)
* Fix CqlRecordWriter with composite keys (CASSANDRA-5949)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/b14273b4/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java b/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
index df28c46..40934d4 100644
--- a/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
+++ b/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
@@ -153,7 +153,7 @@ public class SSTableNamesIterator extends SimpleAbstractColumnIterator implement
List<OnDiskAtom> result = new ArrayList<OnDiskAtom>();
if (indexList.isEmpty())
{
- readSimpleColumns(file, columns, result);
+ readSimpleColumns(sstable.metadata, file, columns, result);
}
else
{
@@ -175,27 +175,37 @@ public class SSTableNamesIterator extends SimpleAbstractColumnIterator implement
iter = result.iterator();
}
- private void readSimpleColumns(FileDataInput file, SortedSet<ByteBuffer> columnNames, List<OnDiskAtom> result) throws IOException
+ private void readSimpleColumns(CFMetaData metadata,
+ FileDataInput file,
+ SortedSet<ByteBuffer> columnNames,
+ List<OnDiskAtom> result)
+ throws IOException
{
+ AbstractType<?> comparator = metadata.comparator;
OnDiskAtom.Serializer atomSerializer = cf.getOnDiskSerializer();
+ ByteBuffer maximalColumnName = columnNames.last();
int count = file.readInt();
- int n = 0;
+
for (int i = 0; i < count; i++)
{
OnDiskAtom column = atomSerializer.deserializeFromSSTable(file, sstable.descriptor.version);
+ ByteBuffer columnName = column.name();
+
if (column instanceof IColumn)
{
- if (columnNames.contains(column.name()))
+ if (columnNames.contains(columnName))
{
result.add(column);
- if (++n >= columns.size())
- break;
}
}
else
{
result.add(column);
}
+
+ // Already consumed all of this block that's going to have columns that apply to this query.
+ if (comparator.compare(columnName, maximalColumnName) >= 0)
+ break;
}
}
@@ -231,6 +241,12 @@ public class SSTableNamesIterator extends SimpleAbstractColumnIterator implement
{
long positionToSeek = basePosition + indexInfo.offset;
+ // SortedSet.subSet() is end-exclusive, so we special-case that
+ // if it's one of the columns we're looking for
+ ByteBuffer maximalColumnName = columnNames.contains(indexInfo.lastName)
+ ? indexInfo.lastName
+ : columnNames.subSet(indexInfo.firstName, indexInfo.lastName).last();
+
// With new promoted indexes, our first seek in the data file will happen at that point.
if (file == null)
file = createFileDataInput(positionToSeek);
@@ -238,13 +254,20 @@ public class SSTableNamesIterator extends SimpleAbstractColumnIterator implement
OnDiskAtom.Serializer atomSerializer = cf.getOnDiskSerializer();
file.seek(positionToSeek);
FileMark mark = file.mark();
+
// TODO only completely deserialize columns we are interested in
while (file.bytesPastMark(mark) < indexInfo.width)
{
OnDiskAtom column = atomSerializer.deserializeFromSSTable(file, sstable.descriptor.version);
+ ByteBuffer columnName = column.name();
+
// we check vs the original Set, not the filtered List, for efficiency
- if (!(column instanceof IColumn) || columnNames.contains(column.name()))
+ if (!(column instanceof IColumn) || columnNames.contains(columnName))
result.add(column);
+
+ // Already consumed all of this block that's going to have columns that apply to this query.
+ if (comparator.compare(columnName, maximalColumnName) >= 0)
+ break;
}
}
}
[2/7] git commit: Optimize name query performance in wide rows patch
by rbranson; reviewed by jbellis for CASSANDRA-5966
Posted by jb...@apache.org.
Optimize name query performance in wide rows
patch by rbranson; reviewed by jbellis for CASSANDRA-5966
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/b14273b4
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/b14273b4
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/b14273b4
Branch: refs/heads/trunk
Commit: b14273b4352ac814946d8e1b9f9be479ddae8d0b
Parents: 1a50215
Author: Jonathan Ellis <jb...@apache.org>
Authored: Sat Aug 31 20:35:19 2013 -0500
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Sat Aug 31 20:35:19 2013 -0500
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../db/columniterator/SSTableNamesIterator.java | 37 ++++++++++++++++----
2 files changed, 31 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/b14273b4/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 777d0d1..1943217 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
1.2.10
+ * Optimize name query performance in wide rows (CASSANDRA-5966)
* Upgrade metrics-core to version 2.2.0 (CASSANDRA-5947)
* Add snitch, schema version, cluster, partitioner to JMX (CASSANDRA-5881)
* Fix CqlRecordWriter with composite keys (CASSANDRA-5949)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/b14273b4/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java b/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
index df28c46..40934d4 100644
--- a/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
+++ b/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
@@ -153,7 +153,7 @@ public class SSTableNamesIterator extends SimpleAbstractColumnIterator implement
List<OnDiskAtom> result = new ArrayList<OnDiskAtom>();
if (indexList.isEmpty())
{
- readSimpleColumns(file, columns, result);
+ readSimpleColumns(sstable.metadata, file, columns, result);
}
else
{
@@ -175,27 +175,37 @@ public class SSTableNamesIterator extends SimpleAbstractColumnIterator implement
iter = result.iterator();
}
- private void readSimpleColumns(FileDataInput file, SortedSet<ByteBuffer> columnNames, List<OnDiskAtom> result) throws IOException
+ private void readSimpleColumns(CFMetaData metadata,
+ FileDataInput file,
+ SortedSet<ByteBuffer> columnNames,
+ List<OnDiskAtom> result)
+ throws IOException
{
+ AbstractType<?> comparator = metadata.comparator;
OnDiskAtom.Serializer atomSerializer = cf.getOnDiskSerializer();
+ ByteBuffer maximalColumnName = columnNames.last();
int count = file.readInt();
- int n = 0;
+
for (int i = 0; i < count; i++)
{
OnDiskAtom column = atomSerializer.deserializeFromSSTable(file, sstable.descriptor.version);
+ ByteBuffer columnName = column.name();
+
if (column instanceof IColumn)
{
- if (columnNames.contains(column.name()))
+ if (columnNames.contains(columnName))
{
result.add(column);
- if (++n >= columns.size())
- break;
}
}
else
{
result.add(column);
}
+
+ // Already consumed all of this block that's going to have columns that apply to this query.
+ if (comparator.compare(columnName, maximalColumnName) >= 0)
+ break;
}
}
@@ -231,6 +241,12 @@ public class SSTableNamesIterator extends SimpleAbstractColumnIterator implement
{
long positionToSeek = basePosition + indexInfo.offset;
+ // SortedSet.subSet() is end-exclusive, so we special-case that
+ // if it's one of the columns we're looking for
+ ByteBuffer maximalColumnName = columnNames.contains(indexInfo.lastName)
+ ? indexInfo.lastName
+ : columnNames.subSet(indexInfo.firstName, indexInfo.lastName).last();
+
// With new promoted indexes, our first seek in the data file will happen at that point.
if (file == null)
file = createFileDataInput(positionToSeek);
@@ -238,13 +254,20 @@ public class SSTableNamesIterator extends SimpleAbstractColumnIterator implement
OnDiskAtom.Serializer atomSerializer = cf.getOnDiskSerializer();
file.seek(positionToSeek);
FileMark mark = file.mark();
+
// TODO only completely deserialize columns we are interested in
while (file.bytesPastMark(mark) < indexInfo.width)
{
OnDiskAtom column = atomSerializer.deserializeFromSSTable(file, sstable.descriptor.version);
+ ByteBuffer columnName = column.name();
+
// we check vs the original Set, not the filtered List, for efficiency
- if (!(column instanceof IColumn) || columnNames.contains(column.name()))
+ if (!(column instanceof IColumn) || columnNames.contains(columnName))
result.add(column);
+
+ // Already consumed all of this block that's going to have columns that apply to this query.
+ if (comparator.compare(columnName, maximalColumnName) >= 0)
+ break;
}
}
}
[5/7] git commit: merge from 1.2
Posted by jb...@apache.org.
merge from 1.2
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/7375c03a
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/7375c03a
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/7375c03a
Branch: refs/heads/trunk
Commit: 7375c03aa6b6b8c97b9485e8a94b26a9ca1ee465
Parents: bd1a703
Author: Jonathan Ellis <jb...@apache.org>
Authored: Sat Aug 31 21:36:03 2013 -0500
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Sat Aug 31 21:36:03 2013 -0500
----------------------------------------------------------------------
.../db/columniterator/SSTableNamesIterator.java | 35 ++++++++++++++++----
1 file changed, 28 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/7375c03a/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java b/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
index 3467244..3e8c0a0 100644
--- a/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
+++ b/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
@@ -141,7 +141,7 @@ public class SSTableNamesIterator extends AbstractIterator<OnDiskAtom> implement
if (indexList.isEmpty())
{
int columnCount = sstable.descriptor.version.hasRowSizeAndColumnCount ? file.readInt() : Integer.MAX_VALUE;
- readSimpleColumns(file, columns, result, columnCount);
+ readSimpleColumns(sstable.metadata, file, columns, result, columnCount);
}
else
{
@@ -152,26 +152,35 @@ public class SSTableNamesIterator extends AbstractIterator<OnDiskAtom> implement
iter = result.iterator();
}
- private void readSimpleColumns(FileDataInput file, SortedSet<ByteBuffer> columnNames, List<OnDiskAtom> result, int columnCount)
+ private void readSimpleColumns(CFMetaData metadata,
+ FileDataInput file,
+ SortedSet<ByteBuffer> columnNames,
+ List<OnDiskAtom> result,
+ int columnCount)
+ throws IOException
{
+ AbstractType<?> comparator = metadata.comparator;
Iterator<OnDiskAtom> atomIterator = cf.metadata().getOnDiskIterator(file, columnCount, sstable.descriptor.version);
- int n = 0;
+ ByteBuffer maximalColumnName = columnNames.last();
while (atomIterator.hasNext())
{
OnDiskAtom column = atomIterator.next();
+ ByteBuffer columnName = column.name();
if (column instanceof Column)
{
- if (columnNames.contains(column.name()))
+ if (columnNames.contains(columnName))
{
result.add(column);
- if (++n >= columns.size())
- break;
}
}
else
{
result.add(column);
}
+
+ // Already consumed all of this block that's going to have columns that apply to this query.
+ if (comparator.compare(columnName, maximalColumnName) >= 0)
+ break;
}
}
@@ -207,6 +216,12 @@ public class SSTableNamesIterator extends AbstractIterator<OnDiskAtom> implement
{
long positionToSeek = basePosition + indexInfo.offset;
+ // SortedSet.subSet() is end-exclusive, so we special-case that
+ // if it's one of the columns we're looking for
+ ByteBuffer maximalColumnName = columnNames.contains(indexInfo.lastName)
+ ? indexInfo.lastName
+ : columnNames.subSet(indexInfo.firstName, indexInfo.lastName).last();
+
// With new promoted indexes, our first seek in the data file will happen at that point.
if (file == null)
file = createFileDataInput(positionToSeek);
@@ -215,13 +230,19 @@ public class SSTableNamesIterator extends AbstractIterator<OnDiskAtom> implement
Iterator<OnDiskAtom> atomIterator = cf.metadata().getOnDiskIterator(file, Integer.MAX_VALUE, sstable.descriptor.version);
file.seek(positionToSeek);
FileMark mark = file.mark();
+
// TODO only completely deserialize columns we are interested in
while (file.bytesPastMark(mark) < indexInfo.width)
{
OnDiskAtom column = atomIterator.next();
+ ByteBuffer columnName = column.name();
// we check vs the original Set, not the filtered List, for efficiency
- if (!(column instanceof Column) || columnNames.contains(column.name()))
+ if (!(column instanceof Column) || columnNames.contains(columnName))
result.add(column);
+
+ // Already consumed all of this block that's going to have columns that apply to this query.
+ if (comparator.compare(columnName, maximalColumnName) >= 0)
+ break;
}
}
}
[4/7] git commit: Merge branch 'cassandra-1.2' into cassandra-2.0
Posted by jb...@apache.org.
Merge branch 'cassandra-1.2' into cassandra-2.0
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/bd1a703d
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/bd1a703d
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/bd1a703d
Branch: refs/heads/cassandra-2.0
Commit: bd1a703d90ffdc0970628cc3b373ee7cb9a87137
Parents: 83a43f1 b14273b
Author: Jonathan Ellis <jb...@apache.org>
Authored: Sat Aug 31 21:35:34 2013 -0500
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Sat Aug 31 21:35:34 2013 -0500
----------------------------------------------------------------------
----------------------------------------------------------------------
[7/7] git commit: Merge branch 'cassandra-2.0' into trunk
Posted by jb...@apache.org.
Merge branch 'cassandra-2.0' into trunk
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/7a0618c2
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/7a0618c2
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/7a0618c2
Branch: refs/heads/trunk
Commit: 7a0618c276ab1eb980b49e6a965013fe8fddd6c7
Parents: 0ebfa3f 7375c03
Author: Jonathan Ellis <jb...@apache.org>
Authored: Sat Aug 31 21:36:10 2013 -0500
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Sat Aug 31 21:36:10 2013 -0500
----------------------------------------------------------------------
.../db/columniterator/SSTableNamesIterator.java | 35 ++++++++++++++++----
1 file changed, 28 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
[3/7] git commit: Merge branch 'cassandra-1.2' into cassandra-2.0
Posted by jb...@apache.org.
Merge branch 'cassandra-1.2' into cassandra-2.0
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/bd1a703d
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/bd1a703d
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/bd1a703d
Branch: refs/heads/trunk
Commit: bd1a703d90ffdc0970628cc3b373ee7cb9a87137
Parents: 83a43f1 b14273b
Author: Jonathan Ellis <jb...@apache.org>
Authored: Sat Aug 31 21:35:34 2013 -0500
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Sat Aug 31 21:35:34 2013 -0500
----------------------------------------------------------------------
----------------------------------------------------------------------
[6/7] git commit: merge from 1.2
Posted by jb...@apache.org.
merge from 1.2
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/7375c03a
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/7375c03a
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/7375c03a
Branch: refs/heads/cassandra-2.0
Commit: 7375c03aa6b6b8c97b9485e8a94b26a9ca1ee465
Parents: bd1a703
Author: Jonathan Ellis <jb...@apache.org>
Authored: Sat Aug 31 21:36:03 2013 -0500
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Sat Aug 31 21:36:03 2013 -0500
----------------------------------------------------------------------
.../db/columniterator/SSTableNamesIterator.java | 35 ++++++++++++++++----
1 file changed, 28 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/7375c03a/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java b/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
index 3467244..3e8c0a0 100644
--- a/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
+++ b/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java
@@ -141,7 +141,7 @@ public class SSTableNamesIterator extends AbstractIterator<OnDiskAtom> implement
if (indexList.isEmpty())
{
int columnCount = sstable.descriptor.version.hasRowSizeAndColumnCount ? file.readInt() : Integer.MAX_VALUE;
- readSimpleColumns(file, columns, result, columnCount);
+ readSimpleColumns(sstable.metadata, file, columns, result, columnCount);
}
else
{
@@ -152,26 +152,35 @@ public class SSTableNamesIterator extends AbstractIterator<OnDiskAtom> implement
iter = result.iterator();
}
- private void readSimpleColumns(FileDataInput file, SortedSet<ByteBuffer> columnNames, List<OnDiskAtom> result, int columnCount)
+ private void readSimpleColumns(CFMetaData metadata,
+ FileDataInput file,
+ SortedSet<ByteBuffer> columnNames,
+ List<OnDiskAtom> result,
+ int columnCount)
+ throws IOException
{
+ AbstractType<?> comparator = metadata.comparator;
Iterator<OnDiskAtom> atomIterator = cf.metadata().getOnDiskIterator(file, columnCount, sstable.descriptor.version);
- int n = 0;
+ ByteBuffer maximalColumnName = columnNames.last();
while (atomIterator.hasNext())
{
OnDiskAtom column = atomIterator.next();
+ ByteBuffer columnName = column.name();
if (column instanceof Column)
{
- if (columnNames.contains(column.name()))
+ if (columnNames.contains(columnName))
{
result.add(column);
- if (++n >= columns.size())
- break;
}
}
else
{
result.add(column);
}
+
+ // Already consumed all of this block that's going to have columns that apply to this query.
+ if (comparator.compare(columnName, maximalColumnName) >= 0)
+ break;
}
}
@@ -207,6 +216,12 @@ public class SSTableNamesIterator extends AbstractIterator<OnDiskAtom> implement
{
long positionToSeek = basePosition + indexInfo.offset;
+ // SortedSet.subSet() is end-exclusive, so we special-case that
+ // if it's one of the columns we're looking for
+ ByteBuffer maximalColumnName = columnNames.contains(indexInfo.lastName)
+ ? indexInfo.lastName
+ : columnNames.subSet(indexInfo.firstName, indexInfo.lastName).last();
+
// With new promoted indexes, our first seek in the data file will happen at that point.
if (file == null)
file = createFileDataInput(positionToSeek);
@@ -215,13 +230,19 @@ public class SSTableNamesIterator extends AbstractIterator<OnDiskAtom> implement
Iterator<OnDiskAtom> atomIterator = cf.metadata().getOnDiskIterator(file, Integer.MAX_VALUE, sstable.descriptor.version);
file.seek(positionToSeek);
FileMark mark = file.mark();
+
// TODO only completely deserialize columns we are interested in
while (file.bytesPastMark(mark) < indexInfo.width)
{
OnDiskAtom column = atomIterator.next();
+ ByteBuffer columnName = column.name();
// we check vs the original Set, not the filtered List, for efficiency
- if (!(column instanceof Column) || columnNames.contains(column.name()))
+ if (!(column instanceof Column) || columnNames.contains(columnName))
result.add(column);
+
+ // Already consumed all of this block that's going to have columns that apply to this query.
+ if (comparator.compare(columnName, maximalColumnName) >= 0)
+ break;
}
}
}