You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by sl...@apache.org on 2014/06/25 16:01:50 UTC

[1/2] git commit: Support DISTINCT for static columns

Repository: cassandra
Updated Branches:
  refs/heads/cassandra-2.1 1569bd9c4 -> d27cd394d


Support DISTINCT for static columns

This also fix the behaviour when DISTINCT is not used.

patch by slebresne; reviewed by thobbs for CASSANDRA-7305


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/6ebee66c
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/6ebee66c
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/6ebee66c

Branch: refs/heads/cassandra-2.1
Commit: 6ebee66c6e828f823a13a06cf0344ff8c272ff4f
Parents: 94ff639
Author: Sylvain Lebresne <sy...@datastax.com>
Authored: Wed Jun 25 11:30:53 2014 +0200
Committer: Sylvain Lebresne <sy...@datastax.com>
Committed: Wed Jun 25 11:30:53 2014 +0200

----------------------------------------------------------------------
 CHANGES.txt                                     |  2 +
 NEWS.txt                                        |  9 ++-
 .../cql3/statements/SelectStatement.java        | 72 +++++++++++---------
 3 files changed, 48 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/6ebee66c/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index d23ce37..7ec2501 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -20,6 +20,8 @@
  * Account for range tombstones in min/max column names (CASSANDRA-7235)
  * Improve sub range repair validation (CASSANDRA-7317)
  * Accept subtypes for function results, type casts (CASSANDRA-6766)
+ * Support DISTINCT for static columns and fix behaviour when DISTINC is
+   not use (CASSANDRA-7305).
 Merged from 1.2:
  * Expose global ColumnFamily metrics (CASSANDRA-7273)
  * Handle possible integer overflow in FastByteArrayOutputStream (CASSANDRA-7373)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/6ebee66c/NEWS.txt
----------------------------------------------------------------------
diff --git a/NEWS.txt b/NEWS.txt
index 422330c..0fbc20f 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -16,11 +16,16 @@ using the provided 'sstableupgrade' tool.
 2.0.9
 =====
 
-Operations
-----------
+Upgrading
+---------
     - Default values for read_repair_chance and local_read_repair_chance have been
       swapped. Namely, default read_repair_chance is now set to 0.0, and default
       local_read_repair_chance to 0.1.
+    - Queries selecting only CQL static columns were (mistakenly) not returning one
+      result per row in the partition. This has been fixed and a SELECT DISTINCT
+      can be used when only the static column of a partition needs to be fetch
+      without fetching the whole partition. But if you use static columns, please
+      make sure this won't affect you (see CASSANDRA-7305 for details).
 
 
 2.0.8

http://git-wip-us.apache.org/repos/asf/cassandra/blob/6ebee66c/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
index 7a91517..f106402 100644
--- a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
+++ b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
@@ -427,12 +427,26 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache
         }
     }
 
+    private ColumnSlice makeStaticSlice()
+    {
+        ColumnNameBuilder staticPrefix = cfDef.cfm.getStaticColumnNameBuilder();
+        // Note: we could use staticPrefix.build() for the start bound, but EMPTY_BYTE_BUFFER gives us the
+        // same effect while saving a few CPU cycles.
+        return isReversed
+             ? new ColumnSlice(staticPrefix.buildAsEndOfRange(), ByteBufferUtil.EMPTY_BYTE_BUFFER)
+             : new ColumnSlice(ByteBufferUtil.EMPTY_BYTE_BUFFER, staticPrefix.buildAsEndOfRange());
+    }
+
     private IDiskAtomFilter makeFilter(List<ByteBuffer> variables, int limit)
     throws InvalidRequestException
     {
+        int toGroup = cfDef.isCompact ? -1 : cfDef.clusteringColumnsCount();
         if (parameters.isDistinct)
         {
-            return new SliceQueryFilter(ColumnSlice.ALL_COLUMNS_ARRAY, false, 1, -1);
+            // For distinct, we only care about fetching the beginning of each partition. If we don't have
+            // static columns, we in fact only care about the first cell, so we query only that (we don't "group").
+            // If we do have static columns, we do need to fetch the first full group (to have the static columns values).
+            return new SliceQueryFilter(ColumnSlice.ALL_COLUMNS_ARRAY, false, 1, selectsStaticColumns ? toGroup : -1);
         }
         else if (isColumnRange())
         {
@@ -440,7 +454,6 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache
             // to account for the grouping of columns.
             // Since that doesn't work for maps/sets/lists, we now use the compositesToGroup option of SliceQueryFilter.
             // But we must preserve backward compatibility too (for mixed version cluster that is).
-            int toGroup = cfDef.isCompact ? -1 : cfDef.clusteringColumnsCount();
             List<ByteBuffer> startBounds = getRequestedBound(Bound.START, variables);
             List<ByteBuffer> endBounds = getRequestedBound(Bound.END, variables);
             assert startBounds.size() == endBounds.size();
@@ -448,21 +461,9 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache
             // Handles fetching static columns. Note that for 2i, the filter is just used to restrict
             // the part of the index to query so adding the static slice would be useless and confusing.
             // For 2i, static columns are retrieve in CompositesSearcher with each index hit.
-            ColumnSlice staticSlice = null;
-            if (selectsStaticColumns && !usesSecondaryIndexing)
-            {
-                ColumnNameBuilder staticPrefix = cfDef.cfm.getStaticColumnNameBuilder();
-                // Note: we could use staticPrefix.build() for the start bound, but EMPTY_BYTE_BUFFER gives us the
-                // same effect while saving a few CPU cycles.
-                staticSlice = isReversed
-                            ? new ColumnSlice(staticPrefix.buildAsEndOfRange(), ByteBufferUtil.EMPTY_BYTE_BUFFER)
-                            : new ColumnSlice(ByteBufferUtil.EMPTY_BYTE_BUFFER, staticPrefix.buildAsEndOfRange());
-
-                // In the case where we only select static columns, we want to really only check the static columns.
-                // So we return early as the rest of that method would actually make us query everything
-                if (selectsOnlyStaticColumns)
-                    return sliceFilter(staticSlice, limit, toGroup);
-            }
+            ColumnSlice staticSlice = selectsStaticColumns && !usesSecondaryIndexing
+                                    ? makeStaticSlice()
+                                    : null;
 
             // The case where startBounds == 1 is common enough that it's worth optimizing
             if (startBounds.size() == 1)
@@ -1088,7 +1089,7 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache
                                    ? ((CompositeType)cfDef.cfm.getKeyValidator()).split(key)
                                    : new ByteBuffer[]{ key };
 
-        if (parameters.isDistinct)
+        if (parameters.isDistinct && !selectsStaticColumns)
         {
             if (!cf.hasOnlyTombstones(now))
             {
@@ -1331,6 +1332,23 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache
         return false;
     }
 
+    private void validateDistinctSelection()
+    throws InvalidRequestException
+    {
+        Collection<CFDefinition.Name> requestedColumns = selection.getColumns();
+        for (CFDefinition.Name name : requestedColumns)
+            if (name.kind != CFDefinition.Name.Kind.KEY_ALIAS && name.kind != CFDefinition.Name.Kind.STATIC)
+                throw new InvalidRequestException(String.format("SELECT DISTINCT queries must only request partition key columns and/or static columns (not %s)", name));
+
+        // If it's a key range, we require that all partition key columns are selected so we don't have to bother with post-query grouping.
+        if (!isKeyRange)
+            return;
+
+        for (CFDefinition.Name name : cfDef.partitionKeys())
+            if (!requestedColumns.contains(name))
+                throw new InvalidRequestException(String.format("SELECT DISTINCT queries must request all the partition key columns (missing %s)", name));
+    }
+
     public static class RawStatement extends CFStatement
     {
         private final Parameters parameters;
@@ -1363,9 +1381,6 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache
                                 ? Selection.wildcard(cfDef)
                                 : Selection.fromSelectors(cfDef, selectClause);
 
-            if (parameters.isDistinct)
-                validateDistinctSelection(selection.getColumns(), cfDef.partitionKeys());
-
             SelectStatement stmt = new SelectStatement(cfDef, boundNames.size(), parameters, selection, prepareLimit(boundNames));
 
             /*
@@ -1440,6 +1455,9 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache
 
             checkNeedsFiltering(stmt);
 
+            if (parameters.isDistinct)
+                stmt.validateDistinctSelection();
+
             return new ParsedStatement.Prepared(stmt, boundNames);
         }
 
@@ -1961,18 +1979,6 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache
             }
         }
 
-        private void validateDistinctSelection(Collection<CFDefinition.Name> requestedColumns, Collection<CFDefinition.Name> partitionKey)
-        throws InvalidRequestException
-        {
-            for (CFDefinition.Name name : requestedColumns)
-                if (!partitionKey.contains(name))
-                    throw new InvalidRequestException(String.format("SELECT DISTINCT queries must only request partition key columns (not %s)", name));
-
-            for (CFDefinition.Name name : partitionKey)
-                if (!requestedColumns.contains(name))
-                    throw new InvalidRequestException(String.format("SELECT DISTINCT queries must request all the partition key columns (missing %s)", name));
-        }
-
         private boolean containsAlias(final ColumnIdentifier name)
         {
             return Iterables.any(selectClause, new Predicate<RawSelector>()


[2/2] git commit: Merge branch 'cassandra-2.0' into cassandra-2.1

Posted by sl...@apache.org.
Merge branch 'cassandra-2.0' into cassandra-2.1

Conflicts:
	src/java/org/apache/cassandra/cql3/statements/SelectStatement.java


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/d27cd394
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/d27cd394
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/d27cd394

Branch: refs/heads/cassandra-2.1
Commit: d27cd394d8db5b713b159cf7b0bf5b40a9d2572c
Parents: 1569bd9 6ebee66
Author: Sylvain Lebresne <sy...@datastax.com>
Authored: Wed Jun 25 16:01:26 2014 +0200
Committer: Sylvain Lebresne <sy...@datastax.com>
Committed: Wed Jun 25 16:01:26 2014 +0200

----------------------------------------------------------------------
 CHANGES.txt                                     |  6 ++
 NEWS.txt                                        |  9 ++-
 .../cql3/statements/SelectStatement.java        | 68 +++++++++++---------
 3 files changed, 50 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/d27cd394/CHANGES.txt
----------------------------------------------------------------------
diff --cc CHANGES.txt
index 4b97875,7ec2501..0c92663
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@@ -1,31 -1,22 +1,37 @@@
 -2.0.9
 - * Fix assertion error in CL.ANY timeout handling (CASSANDRA-7364)
 - * Handle empty CFs in Memtable#maybeUpdateLiveRatio() (CASSANDRA-7401)
++2.1.0
++Merged from 2.0:
++ * Support DISTINCT for static columns and fix behaviour when DISTINC is
++   not use (CASSANDRA-7305).
++
++
 +2.1.0-rc2
 + * Fix heap size calculation for CompoundSparseCellName and 
 +   CompoundSparseCellName.WithCollection (CASSANDRA-7421)
 + * Allow counter mutations in UNLOGGED batches (CASSANDRA-7351)
 + * Modify reconcile logic to always pick a tombstone over a counter cell
 +   (CASSANDRA-7346)
 + * Avoid incremental compaction on Windows (CASSANDRA-7365)
 + * Fix exception when querying a composite-keyed table with a collection index
 +   (CASSANDRA-7372)
 + * Use node's host id in place of counter ids (CASSANDRA-7366)
   * Fix native protocol CAS batches (CASSANDRA-7337)
 + * Reduce likelihood of contention on local paxos locking (CASSANDRA-7359)
 + * Upgrade to Pig 0.12.1 (CASSANDRA-6556)
 + * Make sure we clear out repair sessions from netstats (CASSANDRA-7329)
 + * Don't fail streams on failure detector downs (CASSANDRA-3569)
 + * Add optional keyspace to DROP INDEX statement (CASSANDRA-7314)
 + * Reduce run time for CQL tests (CASSANDRA-7327)
 + * Fix heap size calculation on Windows (CASSANDRA-7352, 7353)
 + * RefCount native frames from netty (CASSANDRA-7245)
 + * Use tarball dir instead of /var for default paths (CASSANDRA-7136)
 + * Remove rows_per_partition_to_cache keyword (CASSANDRA-7193)
 + * Fix schema change response in native protocol v3 (CASSANDRA-7413)
 +Merged from 2.0:
 + * Fix assertion error in CL.ANY timeout handling (CASSANDRA-7364)
   * Add per-CF range read request latency metrics (CASSANDRA-7338)
   * Fix NPE in StreamTransferTask.createMessageForRetry() (CASSANDRA-7323)
 - * Add conditional CREATE/DROP USER support (CASSANDRA-7264)
 - * Swap local and global default read repair chances (CASSANDRA-7320)
 - * Add missing iso8601 patterns for date strings (CASSANDRA-6973)
 - * Support selecting multiple rows in a partition using IN (CASSANDRA-6875)
 - * cqlsh: always emphasize the partition key in DESC output (CASSANDRA-7274)
 - * Copy compaction options to make sure they are reloaded (CASSANDRA-7290)
 - * Add option to do more aggressive tombstone compactions (CASSANDRA-6563)
 - * Don't try to compact already-compacting files in HHOM (CASSANDRA-7288)
 - * Add authentication support to shuffle (CASSANDRA-6484)
 - * Cqlsh counts non-empty lines for "Blank lines" warning (CASSANDRA-7325)
   * Make StreamSession#closeSession() idempotent (CASSANDRA-7262)
   * Fix infinite loop on exception while streaming (CASSANDRA-7330)
 - * Reference sstables before populating key cache (CASSANDRA-7234)
   * Account for range tombstones in min/max column names (CASSANDRA-7235)
   * Improve sub range repair validation (CASSANDRA-7317)
   * Accept subtypes for function results, type casts (CASSANDRA-6766)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d27cd394/NEWS.txt
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d27cd394/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
----------------------------------------------------------------------
diff --cc src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
index 51390a3,f106402..1e1b03f
--- a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
+++ b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
@@@ -423,18 -427,35 +423,30 @@@ public class SelectStatement implement
          }
      }
  
+     private ColumnSlice makeStaticSlice()
+     {
 -        ColumnNameBuilder staticPrefix = cfDef.cfm.getStaticColumnNameBuilder();
 -        // Note: we could use staticPrefix.build() for the start bound, but EMPTY_BYTE_BUFFER gives us the
++        // Note: we could use staticPrefix.start() for the start bound, but EMPTY gives us the
+         // same effect while saving a few CPU cycles.
+         return isReversed
 -             ? new ColumnSlice(staticPrefix.buildAsEndOfRange(), ByteBufferUtil.EMPTY_BYTE_BUFFER)
 -             : new ColumnSlice(ByteBufferUtil.EMPTY_BYTE_BUFFER, staticPrefix.buildAsEndOfRange());
++             ? new ColumnSlice(cfm.comparator.staticPrefix().end(), Composites.EMPTY)
++             : new ColumnSlice(Composites.EMPTY, cfm.comparator.staticPrefix().end());
+     }
+ 
 -    private IDiskAtomFilter makeFilter(List<ByteBuffer> variables, int limit)
 +    private IDiskAtomFilter makeFilter(QueryOptions options, int limit)
      throws InvalidRequestException
      {
 -        int toGroup = cfDef.isCompact ? -1 : cfDef.clusteringColumnsCount();
++        int toGroup = cfm.comparator.isDense() ? -1 : cfm.clusteringColumns().size();
          if (parameters.isDistinct)
          {
-             return new SliceQueryFilter(ColumnSlice.ALL_COLUMNS_ARRAY, false, 1, -1);
+             // For distinct, we only care about fetching the beginning of each partition. If we don't have
+             // static columns, we in fact only care about the first cell, so we query only that (we don't "group").
+             // If we do have static columns, we do need to fetch the first full group (to have the static columns values).
+             return new SliceQueryFilter(ColumnSlice.ALL_COLUMNS_ARRAY, false, 1, selectsStaticColumns ? toGroup : -1);
          }
          else if (isColumnRange())
          {
-             int toGroup = cfm.comparator.isDense() ? -1 : cfm.clusteringColumns().size();
 -            // For sparse, we used to ask for 'defined columns' * 'asked limit' (where defined columns includes the row marker)
 -            // to account for the grouping of columns.
 -            // Since that doesn't work for maps/sets/lists, we now use the compositesToGroup option of SliceQueryFilter.
 -            // But we must preserve backward compatibility too (for mixed version cluster that is).
 -            List<ByteBuffer> startBounds = getRequestedBound(Bound.START, variables);
 -            List<ByteBuffer> endBounds = getRequestedBound(Bound.END, variables);
 +            List<Composite> startBounds = getRequestedBound(Bound.START, options);
 +            List<Composite> endBounds = getRequestedBound(Bound.END, options);
              assert startBounds.size() == endBounds.size();
  
              // Handles fetching static columns. Note that for 2i, the filter is just used to restrict
@@@ -1311,6 -1332,23 +1312,23 @@@
          return false;
      }
  
+     private void validateDistinctSelection()
+     throws InvalidRequestException
+     {
 -        Collection<CFDefinition.Name> requestedColumns = selection.getColumns();
 -        for (CFDefinition.Name name : requestedColumns)
 -            if (name.kind != CFDefinition.Name.Kind.KEY_ALIAS && name.kind != CFDefinition.Name.Kind.STATIC)
 -                throw new InvalidRequestException(String.format("SELECT DISTINCT queries must only request partition key columns and/or static columns (not %s)", name));
++        Collection<ColumnDefinition> requestedColumns = selection.getColumns();
++        for (ColumnDefinition def : requestedColumns)
++            if (def.kind != ColumnDefinition.Kind.PARTITION_KEY && def.kind != ColumnDefinition.Kind.STATIC)
++                throw new InvalidRequestException(String.format("SELECT DISTINCT queries must only request partition key columns and/or static columns (not %s)", def.name));
+ 
+         // If it's a key range, we require that all partition key columns are selected so we don't have to bother with post-query grouping.
+         if (!isKeyRange)
+             return;
+ 
 -        for (CFDefinition.Name name : cfDef.partitionKeys())
 -            if (!requestedColumns.contains(name))
 -                throw new InvalidRequestException(String.format("SELECT DISTINCT queries must request all the partition key columns (missing %s)", name));
++        for (ColumnDefinition def : cfm.partitionKeyColumns())
++            if (!requestedColumns.contains(def))
++                throw new InvalidRequestException(String.format("SELECT DISTINCT queries must request all the partition key columns (missing %s)", def.name));
+     }
+ 
      public static class RawStatement extends CFStatement
      {
          private final Parameters parameters;
@@@ -1337,13 -1378,10 +1355,10 @@@
                  throw new InvalidRequestException("Only COUNT(*) and COUNT(1) operations are currently supported.");
  
              Selection selection = selectClause.isEmpty()
 -                                ? Selection.wildcard(cfDef)
 -                                : Selection.fromSelectors(cfDef, selectClause);
 +                                ? Selection.wildcard(cfm)
 +                                : Selection.fromSelectors(cfm, selectClause);
  
-             if (parameters.isDistinct)
-                 validateDistinctSelection(selection.getColumns(), cfm.partitionKeyColumns());
- 
 -            SelectStatement stmt = new SelectStatement(cfDef, boundNames.size(), parameters, selection, prepareLimit(boundNames));
 +            SelectStatement stmt = new SelectStatement(cfm, boundNames.size(), parameters, selection, prepareLimit(boundNames));
  
              /*
               * WHERE clause. For a given entity, rules are:
@@@ -1934,34 -1979,6 +1952,22 @@@
              }
          }
  
 +        private int indexOf(ColumnDefinition def, Selection selection)
 +        {
 +            return indexOf(def, selection.getColumns().iterator());
 +        }
 +
 +        private int indexOf(final ColumnDefinition def, Iterator<ColumnDefinition> defs)
 +        {
 +            return Iterators.indexOf(defs, new Predicate<ColumnDefinition>()
 +                                           {
 +                                               public boolean apply(ColumnDefinition n)
 +                                               {
 +                                                   return def.name.equals(n.name);
 +                                               }
 +                                           });
 +        }
 +
-         private void validateDistinctSelection(Collection<ColumnDefinition> requestedColumns, Collection<ColumnDefinition> partitionKey)
-         throws InvalidRequestException
-         {
-             for (ColumnDefinition def : requestedColumns)
-                 if (!partitionKey.contains(def))
-                     throw new InvalidRequestException(String.format("SELECT DISTINCT queries must only request partition key columns (not %s)", def.name));
- 
-             for (ColumnDefinition def : partitionKey)
-                 if (!requestedColumns.contains(def))
-                     throw new InvalidRequestException(String.format("SELECT DISTINCT queries must request all the partition key columns (missing %s)", def.name));
-         }
- 
          private boolean containsAlias(final ColumnIdentifier name)
          {
              return Iterables.any(selectClause, new Predicate<RawSelector>()