You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by jb...@apache.org on 2010/07/25 06:16:31 UTC
svn commit: r978978 - in /cassandra/trunk:
src/java/org/apache/cassandra/db/ColumnFamilyStore.java
src/java/org/apache/cassandra/thrift/ThriftValidation.java
test/system/test_thrift_server.py
Author: jbellis
Date: Sun Jul 25 04:16:30 2010
New Revision: 978978
URL: http://svn.apache.org/viewvc?rev=978978&view=rev
Log:
implement multiple index expressions. patch by jbellis; reviewed by Nate McCall for CASSANDRA-1157
Modified:
cassandra/trunk/src/java/org/apache/cassandra/db/ColumnFamilyStore.java
cassandra/trunk/src/java/org/apache/cassandra/thrift/ThriftValidation.java
cassandra/trunk/test/system/test_thrift_server.py
Modified: cassandra/trunk/src/java/org/apache/cassandra/db/ColumnFamilyStore.java
URL: http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/db/ColumnFamilyStore.java?rev=978978&r1=978977&r2=978978&view=diff
==============================================================================
--- cassandra/trunk/src/java/org/apache/cassandra/db/ColumnFamilyStore.java (original)
+++ cassandra/trunk/src/java/org/apache/cassandra/db/ColumnFamilyStore.java Sun Jul 25 04:16:30 2010
@@ -281,15 +281,24 @@ public class ColumnFamilyStore implement
long count = 0;
for (SSTableReader sstable : ssTables_)
{
- if (sstable.getEstimatedRowSize().median() > 0)
- {
- sum += sstable.getEstimatedRowSize().median();
- count++;
- }
+ sum += sstable.getEstimatedRowSize().median();
+ count++;
}
return count > 0 ? sum / count : 0;
}
+ public int getMeanColumns()
+ {
+ long sum = 0;
+ int count = 0;
+ for (SSTableReader sstable : ssTables_)
+ {
+ sum += sstable.getEstimatedColumnCount().median();
+ count++;
+ }
+ return count > 0 ? (int) (sum / count) : 0;
+ }
+
public static ColumnFamilyStore createColumnFamilyStore(String table, String columnFamily)
{
return createColumnFamilyStore(table, columnFamily, StorageService.getPartitioner(), DatabaseDescriptor.getCFMetaData(table, columnFamily));
@@ -1053,9 +1062,8 @@ public class ColumnFamilyStore implement
public List<Row> scan(IndexClause indexClause, IFilter dataFilter)
{
- // TODO: use statistics to pick clause w/ highest selectivity
- // TODO even later: allow merge join instead of just one index + loop
- IndexExpression first = indexClause.expressions.get(0);
+ // TODO: allow merge join instead of just one index + loop
+ IndexExpression first = highestSelectivityPredicate(indexClause);
ColumnFamilyStore indexCFS = getIndexedColumnFamilyStore(first.column_name);
assert indexCFS != null;
DecoratedKey indexKey = indexCFS.partitioner_.decorateKey(first.value);
@@ -1076,14 +1084,48 @@ public class ColumnFamilyStore implement
{
DecoratedKey dk = partitioner_.decorateKey(dataKey);
ColumnFamily data = getColumnFamily(new QueryFilter(dk, new QueryPath(columnFamily_), dataFilter));
- rows.add(new Row(dk, data));
+ boolean accepted = true;
+ for (IndexExpression expression : indexClause.expressions)
+ {
+ // (we can skip "first" since we already know it's satisfied)
+ if (expression != first && !satisfies(data, expression))
+ {
+ accepted = false;
+ break;
+ }
+ }
+ if (accepted)
+ rows.add(new Row(dk, data));
}
- // TODO apply remaining expressions
-
return rows;
}
+ private IndexExpression highestSelectivityPredicate(IndexClause clause)
+ {
+ IndexExpression best = null;
+ int bestMeanCount = Integer.MAX_VALUE;
+ for (IndexExpression expression : clause.expressions)
+ {
+ ColumnFamilyStore cfs = getIndexedColumnFamilyStore(expression.column_name);
+ if (cfs == null)
+ continue;
+ int columns = cfs.getMeanColumns();
+ if (columns < bestMeanCount)
+ {
+ best = expression;
+ bestMeanCount = columns;
+ }
+ }
+ return best;
+ }
+
+ private static boolean satisfies(ColumnFamily data, IndexExpression expression)
+ {
+ IColumn column = data.getColumn(expression.column_name);
+ return column != null && Arrays.equals(column.value(), expression.value);
+ }
+
public AbstractType getComparator()
{
return metadata.comparator;
Modified: cassandra/trunk/src/java/org/apache/cassandra/thrift/ThriftValidation.java
URL: http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/thrift/ThriftValidation.java?rev=978978&r1=978977&r2=978978&view=diff
==============================================================================
--- cassandra/trunk/src/java/org/apache/cassandra/thrift/ThriftValidation.java (original)
+++ cassandra/trunk/src/java/org/apache/cassandra/thrift/ThriftValidation.java Sun Jul 25 04:16:30 2010
@@ -380,8 +380,9 @@ public class ThriftValidation
Set<byte[]> indexedColumns = Table.open(keyspace).getColumnFamilyStore(columnFamily).getIndexedColumns();
for (IndexExpression expression : index_clause.expressions)
{
- if (!indexedColumns.contains(expression.column_name))
- throw new InvalidRequestException("Unable to scan unindexed column");
+ if (indexedColumns.contains(expression.column_name))
+ return;
}
+ throw new InvalidRequestException("No indexed columns present in index clause");
}
}
Modified: cassandra/trunk/test/system/test_thrift_server.py
URL: http://svn.apache.org/viewvc/cassandra/trunk/test/system/test_thrift_server.py?rev=978978&r1=978977&r2=978978&view=diff
==============================================================================
--- cassandra/trunk/test/system/test_thrift_server.py (original)
+++ cassandra/trunk/test/system/test_thrift_server.py Sun Jul 25 04:16:30 2010
@@ -1233,19 +1233,31 @@ class TestMutations(ThriftTester):
_set_keyspace('Keyspace1')
client.insert('key1', ColumnParent('Indexed1'), Column('birthdate', _i64(1), Clock(0)), ConsistencyLevel.ONE)
client.insert('key2', ColumnParent('Indexed1'), Column('birthdate', _i64(2), Clock(0)), ConsistencyLevel.ONE)
+ client.insert('key2', ColumnParent('Indexed1'), Column('b', _i64(2), Clock(0)), ConsistencyLevel.ONE)
+ client.insert('key3', ColumnParent('Indexed1'), Column('birthdate', _i64(3), Clock(0)), ConsistencyLevel.ONE)
client.insert('key3', ColumnParent('Indexed1'), Column('b', _i64(3), Clock(0)), ConsistencyLevel.ONE)
+ # simple query on one index expression
cp = ColumnParent('Indexed1')
- expr = IndexExpression('birthdate', IndexOperator.EQ, _i64(1))
- rp = RowPredicate(index_clause=IndexClause([expr]))
sp = SlicePredicate(slice_range=SliceRange('', ''))
- result = client.scan(cp, rp, sp, ConsistencyLevel.ONE)
+ clause = IndexClause([IndexExpression('birthdate', IndexOperator.EQ, _i64(1))])
+ result = client.scan(cp, RowPredicate(index_clause=clause), sp, ConsistencyLevel.ONE)
assert len(result) == 1, result
assert result[0].key == 'key1'
assert len(result[0].columns) == 1, result[0].columns
- expr.column_name = 'b'
- _expect_exception(lambda: client.scan(cp, rp, sp, ConsistencyLevel.ONE), InvalidRequestException)
+ # solo unindexed expression is invalid
+ clause = IndexClause([IndexExpression('b', IndexOperator.EQ, _i64(1))])
+ _expect_exception(lambda: client.scan(cp, RowPredicate(index_clause=clause), sp, ConsistencyLevel.ONE), InvalidRequestException)
+
+ # but unindexed expression added to indexed one is ok
+ clause = IndexClause([IndexExpression('b', IndexOperator.EQ, _i64(3)),
+ IndexExpression('birthdate', IndexOperator.EQ, _i64(3))])
+ result = client.scan(cp, RowPredicate(index_clause=clause), sp, ConsistencyLevel.ONE)
+ assert len(result) == 1, result
+ assert result[0].key == 'key3'
+ assert len(result[0].columns) == 2, result[0].columns
+
class TestTruncate(ThriftTester):