You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by br...@apache.org on 2014/06/26 18:02:35 UTC

[02/13] git commit: Support DISTINCT for static columns

Support DISTINCT for static columns

This also fix the behaviour when DISTINCT is not used.

patch by slebresne; reviewed by thobbs for CASSANDRA-7305


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/6e4dca02
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/6e4dca02
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/6e4dca02

Branch: refs/heads/trunk
Commit: 6e4dca02a720ac9277370ba1d4cf387ef1a3cfd4
Parents: 77bbcc1
Author: Sylvain Lebresne <sy...@datastax.com>
Authored: Wed Jun 25 11:30:53 2014 +0200
Committer: Sylvain Lebresne <sy...@datastax.com>
Committed: Thu Jun 26 10:12:58 2014 +0200

----------------------------------------------------------------------
 CHANGES.txt                                     |  2 +
 NEWS.txt                                        |  9 ++-
 .../cql3/statements/SelectStatement.java        | 72 +++++++++++---------
 3 files changed, 48 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/6e4dca02/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 84be96d..c3fe8d7 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -21,6 +21,8 @@
  * Account for range tombstones in min/max column names (CASSANDRA-7235)
  * Improve sub range repair validation (CASSANDRA-7317)
  * Accept subtypes for function results, type casts (CASSANDRA-6766)
+ * Support DISTINCT for static columns and fix behaviour when DISTINC is
+   not use (CASSANDRA-7305).
 Merged from 1.2:
  * Expose global ColumnFamily metrics (CASSANDRA-7273)
  * cqlsh: Fix CompositeType columns in DESCRIBE TABLE output (CASSANDRA-7399)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/6e4dca02/NEWS.txt
----------------------------------------------------------------------
diff --git a/NEWS.txt b/NEWS.txt
index 422330c..0fbc20f 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -16,11 +16,16 @@ using the provided 'sstableupgrade' tool.
 2.0.9
 =====
 
-Operations
-----------
+Upgrading
+---------
     - Default values for read_repair_chance and local_read_repair_chance have been
       swapped. Namely, default read_repair_chance is now set to 0.0, and default
       local_read_repair_chance to 0.1.
+    - Queries selecting only CQL static columns were (mistakenly) not returning one
+      result per row in the partition. This has been fixed and a SELECT DISTINCT
+      can be used when only the static column of a partition needs to be fetch
+      without fetching the whole partition. But if you use static columns, please
+      make sure this won't affect you (see CASSANDRA-7305 for details).
 
 
 2.0.8

http://git-wip-us.apache.org/repos/asf/cassandra/blob/6e4dca02/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
index 7a91517..f106402 100644
--- a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
+++ b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
@@ -427,12 +427,26 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache
         }
     }
 
+    private ColumnSlice makeStaticSlice()
+    {
+        ColumnNameBuilder staticPrefix = cfDef.cfm.getStaticColumnNameBuilder();
+        // Note: we could use staticPrefix.build() for the start bound, but EMPTY_BYTE_BUFFER gives us the
+        // same effect while saving a few CPU cycles.
+        return isReversed
+             ? new ColumnSlice(staticPrefix.buildAsEndOfRange(), ByteBufferUtil.EMPTY_BYTE_BUFFER)
+             : new ColumnSlice(ByteBufferUtil.EMPTY_BYTE_BUFFER, staticPrefix.buildAsEndOfRange());
+    }
+
     private IDiskAtomFilter makeFilter(List<ByteBuffer> variables, int limit)
     throws InvalidRequestException
     {
+        int toGroup = cfDef.isCompact ? -1 : cfDef.clusteringColumnsCount();
         if (parameters.isDistinct)
         {
-            return new SliceQueryFilter(ColumnSlice.ALL_COLUMNS_ARRAY, false, 1, -1);
+            // For distinct, we only care about fetching the beginning of each partition. If we don't have
+            // static columns, we in fact only care about the first cell, so we query only that (we don't "group").
+            // If we do have static columns, we do need to fetch the first full group (to have the static columns values).
+            return new SliceQueryFilter(ColumnSlice.ALL_COLUMNS_ARRAY, false, 1, selectsStaticColumns ? toGroup : -1);
         }
         else if (isColumnRange())
         {
@@ -440,7 +454,6 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache
             // to account for the grouping of columns.
             // Since that doesn't work for maps/sets/lists, we now use the compositesToGroup option of SliceQueryFilter.
             // But we must preserve backward compatibility too (for mixed version cluster that is).
-            int toGroup = cfDef.isCompact ? -1 : cfDef.clusteringColumnsCount();
             List<ByteBuffer> startBounds = getRequestedBound(Bound.START, variables);
             List<ByteBuffer> endBounds = getRequestedBound(Bound.END, variables);
             assert startBounds.size() == endBounds.size();
@@ -448,21 +461,9 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache
             // Handles fetching static columns. Note that for 2i, the filter is just used to restrict
             // the part of the index to query so adding the static slice would be useless and confusing.
             // For 2i, static columns are retrieve in CompositesSearcher with each index hit.
-            ColumnSlice staticSlice = null;
-            if (selectsStaticColumns && !usesSecondaryIndexing)
-            {
-                ColumnNameBuilder staticPrefix = cfDef.cfm.getStaticColumnNameBuilder();
-                // Note: we could use staticPrefix.build() for the start bound, but EMPTY_BYTE_BUFFER gives us the
-                // same effect while saving a few CPU cycles.
-                staticSlice = isReversed
-                            ? new ColumnSlice(staticPrefix.buildAsEndOfRange(), ByteBufferUtil.EMPTY_BYTE_BUFFER)
-                            : new ColumnSlice(ByteBufferUtil.EMPTY_BYTE_BUFFER, staticPrefix.buildAsEndOfRange());
-
-                // In the case where we only select static columns, we want to really only check the static columns.
-                // So we return early as the rest of that method would actually make us query everything
-                if (selectsOnlyStaticColumns)
-                    return sliceFilter(staticSlice, limit, toGroup);
-            }
+            ColumnSlice staticSlice = selectsStaticColumns && !usesSecondaryIndexing
+                                    ? makeStaticSlice()
+                                    : null;
 
             // The case where startBounds == 1 is common enough that it's worth optimizing
             if (startBounds.size() == 1)
@@ -1088,7 +1089,7 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache
                                    ? ((CompositeType)cfDef.cfm.getKeyValidator()).split(key)
                                    : new ByteBuffer[]{ key };
 
-        if (parameters.isDistinct)
+        if (parameters.isDistinct && !selectsStaticColumns)
         {
             if (!cf.hasOnlyTombstones(now))
             {
@@ -1331,6 +1332,23 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache
         return false;
     }
 
+    private void validateDistinctSelection()
+    throws InvalidRequestException
+    {
+        Collection<CFDefinition.Name> requestedColumns = selection.getColumns();
+        for (CFDefinition.Name name : requestedColumns)
+            if (name.kind != CFDefinition.Name.Kind.KEY_ALIAS && name.kind != CFDefinition.Name.Kind.STATIC)
+                throw new InvalidRequestException(String.format("SELECT DISTINCT queries must only request partition key columns and/or static columns (not %s)", name));
+
+        // If it's a key range, we require that all partition key columns are selected so we don't have to bother with post-query grouping.
+        if (!isKeyRange)
+            return;
+
+        for (CFDefinition.Name name : cfDef.partitionKeys())
+            if (!requestedColumns.contains(name))
+                throw new InvalidRequestException(String.format("SELECT DISTINCT queries must request all the partition key columns (missing %s)", name));
+    }
+
     public static class RawStatement extends CFStatement
     {
         private final Parameters parameters;
@@ -1363,9 +1381,6 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache
                                 ? Selection.wildcard(cfDef)
                                 : Selection.fromSelectors(cfDef, selectClause);
 
-            if (parameters.isDistinct)
-                validateDistinctSelection(selection.getColumns(), cfDef.partitionKeys());
-
             SelectStatement stmt = new SelectStatement(cfDef, boundNames.size(), parameters, selection, prepareLimit(boundNames));
 
             /*
@@ -1440,6 +1455,9 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache
 
             checkNeedsFiltering(stmt);
 
+            if (parameters.isDistinct)
+                stmt.validateDistinctSelection();
+
             return new ParsedStatement.Prepared(stmt, boundNames);
         }
 
@@ -1961,18 +1979,6 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache
             }
         }
 
-        private void validateDistinctSelection(Collection<CFDefinition.Name> requestedColumns, Collection<CFDefinition.Name> partitionKey)
-        throws InvalidRequestException
-        {
-            for (CFDefinition.Name name : requestedColumns)
-                if (!partitionKey.contains(name))
-                    throw new InvalidRequestException(String.format("SELECT DISTINCT queries must only request partition key columns (not %s)", name));
-
-            for (CFDefinition.Name name : partitionKey)
-                if (!requestedColumns.contains(name))
-                    throw new InvalidRequestException(String.format("SELECT DISTINCT queries must request all the partition key columns (missing %s)", name));
-        }
-
         private boolean containsAlias(final ColumnIdentifier name)
         {
             return Iterables.any(selectClause, new Predicate<RawSelector>()