You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by ty...@apache.org on 2014/07/17 00:19:50 UTC

git commit: Handle queries on multiple secondary index types

Repository: cassandra
Updated Branches:
  refs/heads/cassandra-2.1.0 545e2e1cb -> 2a661195f


Handle queries on multiple secondary index types

Patch and review by Sam Tunnicliffe and Tyler Hobbs for CASSANDRA-7525


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/2a661195
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/2a661195
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/2a661195

Branch: refs/heads/cassandra-2.1.0
Commit: 2a661195fe0fb5fb43b0295266d0decf40820442
Parents: 545e2e1
Author: Tyler Hobbs <ty...@datastax.com>
Authored: Wed Jul 16 17:18:46 2014 -0500
Committer: Tyler Hobbs <ty...@datastax.com>
Committed: Wed Jul 16 17:18:46 2014 -0500

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../db/composites/BoundedComposite.java         |  3 ++
 .../cassandra/db/filter/ExtendedFilter.java     | 22 +++++++++---
 .../db/index/SecondaryIndexManager.java         | 34 +++++++++---------
 .../db/index/SecondaryIndexSearcher.java        | 15 ++++++--
 .../CompositesIndexOnCollectionValue.java       |  9 ++++-
 .../apache/cassandra/service/StorageProxy.java  | 17 +++++----
 .../cassandra/cql3/ContainsRelationTest.java    | 38 ++++++++++++++++++++
 8 files changed, 108 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/2a661195/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index a7f0cee..8864cb1 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -7,6 +7,7 @@
  * Bump CQL version to 3.2.0 and update CQL documentation (CASSANDRA-7527)
  * Fix configuration error message when running nodetool ring (CASSANDRA-7508)
  * Support conditional updates, tuple type, and the v3 protocol in cqlsh (CASSANDRA-7509)
+ * Handle queries on multiple secondary index types (CASSANDRA-7525)
 Merged from 2.0:
  * (Windows) force range-based repair to non-sequential mode (CASSANDRA-7541)
  * Fix range merging when DES scores are zero (CASSANDRA-7535)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2a661195/src/java/org/apache/cassandra/db/composites/BoundedComposite.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/composites/BoundedComposite.java b/src/java/org/apache/cassandra/db/composites/BoundedComposite.java
index 7654edc..7f596fe 100644
--- a/src/java/org/apache/cassandra/db/composites/BoundedComposite.java
+++ b/src/java/org/apache/cassandra/db/composites/BoundedComposite.java
@@ -23,6 +23,9 @@ import org.apache.cassandra.config.CFMetaData;
 import org.apache.cassandra.utils.memory.AbstractAllocator;
 import org.apache.cassandra.utils.ObjectSizes;
 
+/**
+ * Wraps another Composite and adds an EOC byte to track whether this is a slice start or end.
+ */
 public class BoundedComposite extends AbstractComposite
 {
     private static final long EMPTY_SIZE = ObjectSizes.measure(new BoundedComposite(null, false));

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2a661195/src/java/org/apache/cassandra/db/filter/ExtendedFilter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/filter/ExtendedFilter.java b/src/java/org/apache/cassandra/db/filter/ExtendedFilter.java
index 5e410bc..bc59152 100644
--- a/src/java/org/apache/cassandra/db/filter/ExtendedFilter.java
+++ b/src/java/org/apache/cassandra/db/filter/ExtendedFilter.java
@@ -18,13 +18,12 @@
 package org.apache.cassandra.db.filter;
 
 import java.nio.ByteBuffer;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
-import java.util.SortedSet;
-import java.util.TreeSet;
+import java.util.*;
 
+import com.google.common.base.Predicate;
+import com.google.common.collect.Iterators;
 import org.apache.cassandra.db.marshal.CollectionType;
+import org.apache.cassandra.utils.ByteBufferUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -329,6 +328,19 @@ public abstract class ExtendedFilter
         {
             assert def.type.isCollection();
 
+            if (expr.operator == IndexExpression.Operator.CONTAINS)
+            {
+                // get a slice of the collection cells
+                Iterator<Cell> iter = data.iterator(new ColumnSlice[]{ data.getComparator().create(prefix, def).slice() });
+                while (iter.hasNext())
+                {
+                    if (((CollectionType) def.type).valueComparator().compare(iter.next().value(), expr.value) == 0)
+                        return true;
+                }
+
+                return false;
+            }
+
             CollectionType type = (CollectionType)def.type;
             switch (type.kind)
             {

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2a661195/src/java/org/apache/cassandra/db/index/SecondaryIndexManager.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/index/SecondaryIndexManager.java b/src/java/org/apache/cassandra/db/index/SecondaryIndexManager.java
index d32306a..edb9126 100644
--- a/src/java/org/apache/cassandra/db/index/SecondaryIndexManager.java
+++ b/src/java/org/apache/cassandra/db/index/SecondaryIndexManager.java
@@ -213,24 +213,18 @@ public class SecondaryIndexManager
     }
 
     /**
-     * @return true if the indexes can handle the clause.
+     * @return true if at least one of the indexes can handle the clause.
      */
     public boolean hasIndexFor(List<IndexExpression> clause)
     {
         if (clause == null || clause.isEmpty())
             return false;
 
-        // It doesn't seem a clause can have multiple searchers, but since
-        // getIndexSearchersForQuery returns a list ...
-        List<SecondaryIndexSearcher> searchers = getIndexSearchersForQuery(clause);
-        if (searchers.isEmpty())
-            return false;
-
-        for (SecondaryIndexSearcher searcher : searchers)
-            if (!searcher.isIndexing(clause))
-                return false;
+        for (SecondaryIndexSearcher searcher : getIndexSearchersForQuery(clause))
+            if (searcher.canHandleIndexClause(clause))
+                return true;
 
-        return true;
+        return false;
     }
 
     /**
@@ -570,7 +564,6 @@ public class SecondaryIndexManager
 
     /**
      * Performs a search across a number of column indexes
-     * TODO: add support for querying across index types
      *
      * @param filter the column range to restrict to
      * @return found indexed rows
@@ -582,11 +575,20 @@ public class SecondaryIndexManager
         if (indexSearchers.isEmpty())
             return Collections.emptyList();
 
-        //We currently don't support searching across multiple index types
-        if (indexSearchers.size() > 1)
-            throw new RuntimeException("Unable to search across multiple secondary index types");
+        SecondaryIndexSearcher mostSelective = null;
+        long bestEstimate = Long.MAX_VALUE;
+        for (SecondaryIndexSearcher searcher : indexSearchers)
+        {
+            SecondaryIndex highestSelectivityIndex = searcher.highestSelectivityIndex(filter.getClause());
+            long estimate = highestSelectivityIndex.estimateResultRows();
+            if (estimate <= bestEstimate)
+            {
+                bestEstimate = estimate;
+                mostSelective = searcher;
+            }
+        }
 
-        return indexSearchers.get(0).search(filter);
+        return mostSelective.search(filter);
     }
 
     public Set<SecondaryIndex> getIndexesByNames(Set<String> idxNames)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2a661195/src/java/org/apache/cassandra/db/index/SecondaryIndexSearcher.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/index/SecondaryIndexSearcher.java b/src/java/org/apache/cassandra/db/index/SecondaryIndexSearcher.java
index 70c3d6b..395708a 100644
--- a/src/java/org/apache/cassandra/db/index/SecondaryIndexSearcher.java
+++ b/src/java/org/apache/cassandra/db/index/SecondaryIndexSearcher.java
@@ -47,11 +47,20 @@ public abstract class SecondaryIndexSearcher
     public abstract List<Row> search(ExtendedFilter filter);
 
     /**
-     * @return true this index is able to handle given clauses.
+     * @return true this index is able to handle the given index expressions.
      */
-    public boolean isIndexing(List<IndexExpression> clause)
+    public boolean canHandleIndexClause(List<IndexExpression> clause)
     {
-        return highestSelectivityPredicate(clause) != null;
+        for (IndexExpression expression : clause)
+        {
+            if (!columns.contains(expression.column) || !expression.operator.allowsIndexQuery())
+                continue;
+
+            SecondaryIndex index = indexManager.getIndexForColumn(expression.column);
+            if (index != null && index.getIndexCfs() != null)
+                return true;
+        }
+        return false;
     }
 
     protected IndexExpression highestSelectivityPredicate(List<IndexExpression> clause)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2a661195/src/java/org/apache/cassandra/db/index/composites/CompositesIndexOnCollectionValue.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/index/composites/CompositesIndexOnCollectionValue.java b/src/java/org/apache/cassandra/db/index/composites/CompositesIndexOnCollectionValue.java
index 63589f4..7a8c552 100644
--- a/src/java/org/apache/cassandra/db/index/composites/CompositesIndexOnCollectionValue.java
+++ b/src/java/org/apache/cassandra/db/index/composites/CompositesIndexOnCollectionValue.java
@@ -73,7 +73,14 @@ public class CompositesIndexOnCollectionValue extends CompositesIndex
         builder.add(rowKey);
         for (int i = 0; i < Math.min(columnDef.position(), cellName.size()); i++)
             builder.add(cellName.get(i));
-        builder.add(cellName.get(columnDef.position() + 1));
+
+        // When indexing, cellName is a full name including the collection
+        // key. When searching, restricted clustering columns are included
+        // but the collection key is not. In this case, don't try to add an
+        // element to the builder for it, as it will just end up null and
+        // error out when retrieving cells from the index cf (CASSANDRA-7525)
+        if (cellName.size() >= columnDef.position() + 1)
+            builder.add(cellName.get(columnDef.position() + 1));
         return builder.build();
     }
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2a661195/src/java/org/apache/cassandra/service/StorageProxy.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/service/StorageProxy.java b/src/java/org/apache/cassandra/service/StorageProxy.java
index 988c623..9eae17a 100644
--- a/src/java/org/apache/cassandra/service/StorageProxy.java
+++ b/src/java/org/apache/cassandra/service/StorageProxy.java
@@ -1450,14 +1450,19 @@ public class StorageProxy implements StorageProxyMBean
     private static float estimateResultRowsPerRange(AbstractRangeCommand command, Keyspace keyspace)
     {
         ColumnFamilyStore cfs = keyspace.getColumnFamilyStore(command.columnFamily);
-        float resultRowsPerRange;
+        float resultRowsPerRange = Float.POSITIVE_INFINITY;
         if (command.rowFilter != null && !command.rowFilter.isEmpty())
         {
-            // secondary index query (cql3 or otherwise)
-            SecondaryIndexSearcher searcher = Iterables.getOnlyElement(cfs.indexManager.getIndexSearchersForQuery(command.rowFilter));
-            SecondaryIndex highestSelectivityIndex = searcher.highestSelectivityIndex(command.rowFilter);
-            // use our own mean column count as our estimate for how many matching rows each node will have
-            resultRowsPerRange = highestSelectivityIndex.estimateResultRows();
+            List<SecondaryIndexSearcher> searchers = cfs.indexManager.getIndexSearchersForQuery(command.rowFilter);
+            assert !searchers.isEmpty() : "Got row filter with no matching SecondaryIndexSearchers";
+
+            // Secondary index query (cql3 or otherwise).  Estimate result rows based on most selective 2ary index.
+            for (SecondaryIndexSearcher searcher : searchers)
+            {
+                // use our own mean column count as our estimate for how many matching rows each node will have
+                SecondaryIndex highestSelectivityIndex = searcher.highestSelectivityIndex(command.rowFilter);
+                resultRowsPerRange = Math.min(resultRowsPerRange, highestSelectivityIndex.estimateResultRows());
+            }
         }
         else if (!command.countCQL3Rows())
         {

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2a661195/test/unit/org/apache/cassandra/cql3/ContainsRelationTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/cql3/ContainsRelationTest.java b/test/unit/org/apache/cassandra/cql3/ContainsRelationTest.java
index 0c6cfe1..84a8c8f 100644
--- a/test/unit/org/apache/cassandra/cql3/ContainsRelationTest.java
+++ b/test/unit/org/apache/cassandra/cql3/ContainsRelationTest.java
@@ -38,6 +38,10 @@ public class ContainsRelationTest extends CQLTester
         assertRows(execute("SELECT * FROM %s WHERE account = ? AND categories CONTAINS ?", "test", "lmn"),
             row("test", 5, set("lmn"))
         );
+
+        assertRows(execute("SELECT * FROM %s WHERE account = ? AND id = ? AND categories CONTAINS ?", "test", 5, "lmn"),
+                   row("test", 5, set("lmn"))
+        );
     }
 
     @Test
@@ -57,6 +61,10 @@ public class ContainsRelationTest extends CQLTester
         assertRows(execute("SELECT * FROM %s WHERE categories CONTAINS ?", "lmn"),
             row("test", 5, list("lmn"))
         );
+
+        assertRows(execute("SELECT * FROM %s WHERE account = ? AND id = ? AND categories CONTAINS ?;", "test", 5, "lmn"),
+                   row("test", 5, list("lmn"))
+        );
     }
 
     @Test
@@ -75,6 +83,10 @@ public class ContainsRelationTest extends CQLTester
         assertRows(execute("SELECT * FROM %s WHERE categories CONTAINS KEY ?", "lmn"),
             row("test", 5, map("lmn", "foo"))
         );
+
+        assertRows(execute("SELECT * FROM %s WHERE account = ? AND id = ? AND categories CONTAINS KEY ?", "test", 5, "lmn"),
+                   row("test", 5, map("lmn", "foo"))
+        );
     }
 
     @Test
@@ -94,5 +106,31 @@ public class ContainsRelationTest extends CQLTester
         assertRows(execute("SELECT * FROM %s WHERE categories CONTAINS ?", "foo"),
             row("test", 5, map("lmn", "foo"))
         );
+
+        assertRows(execute("SELECT * FROM %s WHERE account = ? AND id = ? AND categories CONTAINS ?", "test", 5, "foo"),
+                   row("test", 5, map("lmn", "foo"))
+        );
+    }
+
+    // See CASSANDRA-7525
+    @Test
+    public void testQueryMultipleIndexTypes() throws Throwable
+    {
+        createTable("CREATE TABLE %s (account text, id int, categories map<text,text>, PRIMARY KEY (account, id))");
+
+        // create an index on
+        createIndex("CREATE INDEX id_index ON %s(id)");
+        createIndex("CREATE INDEX categories_values_index ON %s(categories)");
+
+        execute("INSERT INTO %s (account, id , categories) VALUES (?, ?, ?)", "test", 5, map("lmn", "foo"));
+
+        assertRows(execute("SELECT * FROM %s WHERE categories CONTAINS ? AND id = ? ALLOW FILTERING", "foo", 5),
+                row("test", 5, map("lmn", "foo"))
+        );
+
+        assertRows(
+            execute("SELECT * FROM %s WHERE account = ? AND categories CONTAINS ? AND id = ? ALLOW FILTERING", "test", "foo", 5),
+            row("test", 5, map("lmn", "foo"))
+        );
     }
 }