You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by jb...@apache.org on 2010/07/25 06:16:31 UTC

svn commit: r978978 - in /cassandra/trunk: src/java/org/apache/cassandra/db/ColumnFamilyStore.java src/java/org/apache/cassandra/thrift/ThriftValidation.java test/system/test_thrift_server.py

Author: jbellis
Date: Sun Jul 25 04:16:30 2010
New Revision: 978978

URL: http://svn.apache.org/viewvc?rev=978978&view=rev
Log:
implement multiple index expressions.  patch by jbellis; reviewed by Nate McCall for CASSANDRA-1157

Modified:
    cassandra/trunk/src/java/org/apache/cassandra/db/ColumnFamilyStore.java
    cassandra/trunk/src/java/org/apache/cassandra/thrift/ThriftValidation.java
    cassandra/trunk/test/system/test_thrift_server.py

Modified: cassandra/trunk/src/java/org/apache/cassandra/db/ColumnFamilyStore.java
URL: http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/db/ColumnFamilyStore.java?rev=978978&r1=978977&r2=978978&view=diff
==============================================================================
--- cassandra/trunk/src/java/org/apache/cassandra/db/ColumnFamilyStore.java (original)
+++ cassandra/trunk/src/java/org/apache/cassandra/db/ColumnFamilyStore.java Sun Jul 25 04:16:30 2010
@@ -281,15 +281,24 @@ public class ColumnFamilyStore implement
         long count = 0;
         for (SSTableReader sstable : ssTables_)
         {
-            if (sstable.getEstimatedRowSize().median() > 0)
-            {
-                sum += sstable.getEstimatedRowSize().median();
-                count++;
-            }
+            sum += sstable.getEstimatedRowSize().median();
+            count++;
         }
         return count > 0 ? sum / count : 0;
     }
 
+    public int getMeanColumns()
+    {
+        long sum = 0;
+        int count = 0;
+        for (SSTableReader sstable : ssTables_)
+        {
+            sum += sstable.getEstimatedColumnCount().median();
+            count++;
+        }
+        return count > 0 ? (int) (sum / count) : 0;
+    }
+
     public static ColumnFamilyStore createColumnFamilyStore(String table, String columnFamily)
     {
         return createColumnFamilyStore(table, columnFamily, StorageService.getPartitioner(), DatabaseDescriptor.getCFMetaData(table, columnFamily));
@@ -1053,9 +1062,8 @@ public class ColumnFamilyStore implement
 
     public List<Row> scan(IndexClause indexClause, IFilter dataFilter)
     {
-        // TODO: use statistics to pick clause w/ highest selectivity
-        // TODO even later: allow merge join instead of just one index + loop
-        IndexExpression first = indexClause.expressions.get(0);
+        // TODO: allow merge join instead of just one index + loop
+        IndexExpression first = highestSelectivityPredicate(indexClause);
         ColumnFamilyStore indexCFS = getIndexedColumnFamilyStore(first.column_name);
         assert indexCFS != null;
         DecoratedKey indexKey = indexCFS.partitioner_.decorateKey(first.value);
@@ -1076,14 +1084,48 @@ public class ColumnFamilyStore implement
         {
             DecoratedKey dk = partitioner_.decorateKey(dataKey);
             ColumnFamily data = getColumnFamily(new QueryFilter(dk, new QueryPath(columnFamily_), dataFilter));
-            rows.add(new Row(dk, data));
+            boolean accepted = true;
+            for (IndexExpression expression : indexClause.expressions)
+            {
+                // (we can skip "first" since we already know it's satisfied)
+                if (expression != first && !satisfies(data, expression))
+                {
+                    accepted = false;
+                    break;
+                }
+            }
+            if (accepted)
+                rows.add(new Row(dk, data));
         }
 
-        // TODO apply remaining expressions
-
         return rows;
     }
 
+    private IndexExpression highestSelectivityPredicate(IndexClause clause)
+    {
+        IndexExpression best = null;
+        int bestMeanCount = Integer.MAX_VALUE;
+        for (IndexExpression expression : clause.expressions)
+        {
+            ColumnFamilyStore cfs = getIndexedColumnFamilyStore(expression.column_name);
+            if (cfs == null)
+                continue;
+            int columns = cfs.getMeanColumns();
+            if (columns < bestMeanCount)
+            {
+                best = expression;
+                bestMeanCount = columns;
+            }
+        }
+        return best;
+    }
+
+    private static boolean satisfies(ColumnFamily data, IndexExpression expression)
+    {
+        IColumn column = data.getColumn(expression.column_name);
+        return column != null && Arrays.equals(column.value(), expression.value);
+    }
+
     public AbstractType getComparator()
     {
         return metadata.comparator;

Modified: cassandra/trunk/src/java/org/apache/cassandra/thrift/ThriftValidation.java
URL: http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/thrift/ThriftValidation.java?rev=978978&r1=978977&r2=978978&view=diff
==============================================================================
--- cassandra/trunk/src/java/org/apache/cassandra/thrift/ThriftValidation.java (original)
+++ cassandra/trunk/src/java/org/apache/cassandra/thrift/ThriftValidation.java Sun Jul 25 04:16:30 2010
@@ -380,8 +380,9 @@ public class ThriftValidation
         Set<byte[]> indexedColumns = Table.open(keyspace).getColumnFamilyStore(columnFamily).getIndexedColumns();
         for (IndexExpression expression : index_clause.expressions)
         {
-            if (!indexedColumns.contains(expression.column_name))
-                throw new InvalidRequestException("Unable to scan unindexed column");
+            if (indexedColumns.contains(expression.column_name))
+                return;
         }
+        throw new InvalidRequestException("No indexed columns present in index clause");
     }
 }

Modified: cassandra/trunk/test/system/test_thrift_server.py
URL: http://svn.apache.org/viewvc/cassandra/trunk/test/system/test_thrift_server.py?rev=978978&r1=978977&r2=978978&view=diff
==============================================================================
--- cassandra/trunk/test/system/test_thrift_server.py (original)
+++ cassandra/trunk/test/system/test_thrift_server.py Sun Jul 25 04:16:30 2010
@@ -1233,19 +1233,31 @@ class TestMutations(ThriftTester):
         _set_keyspace('Keyspace1')
         client.insert('key1', ColumnParent('Indexed1'), Column('birthdate', _i64(1), Clock(0)), ConsistencyLevel.ONE)
         client.insert('key2', ColumnParent('Indexed1'), Column('birthdate', _i64(2), Clock(0)), ConsistencyLevel.ONE)
+        client.insert('key2', ColumnParent('Indexed1'), Column('b', _i64(2), Clock(0)), ConsistencyLevel.ONE)
+        client.insert('key3', ColumnParent('Indexed1'), Column('birthdate', _i64(3), Clock(0)), ConsistencyLevel.ONE)
         client.insert('key3', ColumnParent('Indexed1'), Column('b', _i64(3), Clock(0)), ConsistencyLevel.ONE)
 
+        # simple query on one index expression
         cp = ColumnParent('Indexed1')
-        expr = IndexExpression('birthdate', IndexOperator.EQ, _i64(1))
-        rp = RowPredicate(index_clause=IndexClause([expr]))
         sp = SlicePredicate(slice_range=SliceRange('', ''))
-        result = client.scan(cp, rp, sp, ConsistencyLevel.ONE)
+        clause = IndexClause([IndexExpression('birthdate', IndexOperator.EQ, _i64(1))])
+        result = client.scan(cp, RowPredicate(index_clause=clause), sp, ConsistencyLevel.ONE)
         assert len(result) == 1, result
         assert result[0].key == 'key1'
         assert len(result[0].columns) == 1, result[0].columns
 
-        expr.column_name = 'b'
-        _expect_exception(lambda: client.scan(cp, rp, sp, ConsistencyLevel.ONE), InvalidRequestException)
+        # solo unindexed expression is invalid
+        clause = IndexClause([IndexExpression('b', IndexOperator.EQ, _i64(1))])
+        _expect_exception(lambda: client.scan(cp, RowPredicate(index_clause=clause), sp, ConsistencyLevel.ONE), InvalidRequestException)
+
+        # but unindexed expression added to indexed one is ok
+        clause = IndexClause([IndexExpression('b', IndexOperator.EQ, _i64(3)),
+                              IndexExpression('birthdate', IndexOperator.EQ, _i64(3))])
+        result = client.scan(cp, RowPredicate(index_clause=clause), sp, ConsistencyLevel.ONE)
+        assert len(result) == 1, result
+        assert result[0].key == 'key3'
+        assert len(result[0].columns) == 2, result[0].columns
+        
         
 
 class TestTruncate(ThriftTester):