You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by sl...@apache.org on 2014/06/26 18:01:46 UTC
[2/7] git commit: Support DISTINCT for static columns
Support DISTINCT for static columns
This also fix the behaviour when DISTINCT is not used.
patch by slebresne; reviewed by thobbs for CASSANDRA-7305
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/6e4dca02
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/6e4dca02
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/6e4dca02
Branch: refs/heads/cassandra-2.1
Commit: 6e4dca02a720ac9277370ba1d4cf387ef1a3cfd4
Parents: 77bbcc1
Author: Sylvain Lebresne <sy...@datastax.com>
Authored: Wed Jun 25 11:30:53 2014 +0200
Committer: Sylvain Lebresne <sy...@datastax.com>
Committed: Thu Jun 26 10:12:58 2014 +0200
----------------------------------------------------------------------
CHANGES.txt | 2 +
NEWS.txt | 9 ++-
.../cql3/statements/SelectStatement.java | 72 +++++++++++---------
3 files changed, 48 insertions(+), 35 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/6e4dca02/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 84be96d..c3fe8d7 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -21,6 +21,8 @@
* Account for range tombstones in min/max column names (CASSANDRA-7235)
* Improve sub range repair validation (CASSANDRA-7317)
* Accept subtypes for function results, type casts (CASSANDRA-6766)
+ * Support DISTINCT for static columns and fix behaviour when DISTINC is
+ not use (CASSANDRA-7305).
Merged from 1.2:
* Expose global ColumnFamily metrics (CASSANDRA-7273)
* cqlsh: Fix CompositeType columns in DESCRIBE TABLE output (CASSANDRA-7399)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/6e4dca02/NEWS.txt
----------------------------------------------------------------------
diff --git a/NEWS.txt b/NEWS.txt
index 422330c..0fbc20f 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -16,11 +16,16 @@ using the provided 'sstableupgrade' tool.
2.0.9
=====
-Operations
-----------
+Upgrading
+---------
- Default values for read_repair_chance and local_read_repair_chance have been
swapped. Namely, default read_repair_chance is now set to 0.0, and default
local_read_repair_chance to 0.1.
+ - Queries selecting only CQL static columns were (mistakenly) not returning one
+ result per row in the partition. This has been fixed and a SELECT DISTINCT
+ can be used when only the static column of a partition needs to be fetch
+ without fetching the whole partition. But if you use static columns, please
+ make sure this won't affect you (see CASSANDRA-7305 for details).
2.0.8
http://git-wip-us.apache.org/repos/asf/cassandra/blob/6e4dca02/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
index 7a91517..f106402 100644
--- a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
+++ b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
@@ -427,12 +427,26 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache
}
}
+ private ColumnSlice makeStaticSlice()
+ {
+ ColumnNameBuilder staticPrefix = cfDef.cfm.getStaticColumnNameBuilder();
+ // Note: we could use staticPrefix.build() for the start bound, but EMPTY_BYTE_BUFFER gives us the
+ // same effect while saving a few CPU cycles.
+ return isReversed
+ ? new ColumnSlice(staticPrefix.buildAsEndOfRange(), ByteBufferUtil.EMPTY_BYTE_BUFFER)
+ : new ColumnSlice(ByteBufferUtil.EMPTY_BYTE_BUFFER, staticPrefix.buildAsEndOfRange());
+ }
+
private IDiskAtomFilter makeFilter(List<ByteBuffer> variables, int limit)
throws InvalidRequestException
{
+ int toGroup = cfDef.isCompact ? -1 : cfDef.clusteringColumnsCount();
if (parameters.isDistinct)
{
- return new SliceQueryFilter(ColumnSlice.ALL_COLUMNS_ARRAY, false, 1, -1);
+ // For distinct, we only care about fetching the beginning of each partition. If we don't have
+ // static columns, we in fact only care about the first cell, so we query only that (we don't "group").
+ // If we do have static columns, we do need to fetch the first full group (to have the static columns values).
+ return new SliceQueryFilter(ColumnSlice.ALL_COLUMNS_ARRAY, false, 1, selectsStaticColumns ? toGroup : -1);
}
else if (isColumnRange())
{
@@ -440,7 +454,6 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache
// to account for the grouping of columns.
// Since that doesn't work for maps/sets/lists, we now use the compositesToGroup option of SliceQueryFilter.
// But we must preserve backward compatibility too (for mixed version cluster that is).
- int toGroup = cfDef.isCompact ? -1 : cfDef.clusteringColumnsCount();
List<ByteBuffer> startBounds = getRequestedBound(Bound.START, variables);
List<ByteBuffer> endBounds = getRequestedBound(Bound.END, variables);
assert startBounds.size() == endBounds.size();
@@ -448,21 +461,9 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache
// Handles fetching static columns. Note that for 2i, the filter is just used to restrict
// the part of the index to query so adding the static slice would be useless and confusing.
// For 2i, static columns are retrieve in CompositesSearcher with each index hit.
- ColumnSlice staticSlice = null;
- if (selectsStaticColumns && !usesSecondaryIndexing)
- {
- ColumnNameBuilder staticPrefix = cfDef.cfm.getStaticColumnNameBuilder();
- // Note: we could use staticPrefix.build() for the start bound, but EMPTY_BYTE_BUFFER gives us the
- // same effect while saving a few CPU cycles.
- staticSlice = isReversed
- ? new ColumnSlice(staticPrefix.buildAsEndOfRange(), ByteBufferUtil.EMPTY_BYTE_BUFFER)
- : new ColumnSlice(ByteBufferUtil.EMPTY_BYTE_BUFFER, staticPrefix.buildAsEndOfRange());
-
- // In the case where we only select static columns, we want to really only check the static columns.
- // So we return early as the rest of that method would actually make us query everything
- if (selectsOnlyStaticColumns)
- return sliceFilter(staticSlice, limit, toGroup);
- }
+ ColumnSlice staticSlice = selectsStaticColumns && !usesSecondaryIndexing
+ ? makeStaticSlice()
+ : null;
// The case where startBounds == 1 is common enough that it's worth optimizing
if (startBounds.size() == 1)
@@ -1088,7 +1089,7 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache
? ((CompositeType)cfDef.cfm.getKeyValidator()).split(key)
: new ByteBuffer[]{ key };
- if (parameters.isDistinct)
+ if (parameters.isDistinct && !selectsStaticColumns)
{
if (!cf.hasOnlyTombstones(now))
{
@@ -1331,6 +1332,23 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache
return false;
}
+ private void validateDistinctSelection()
+ throws InvalidRequestException
+ {
+ Collection<CFDefinition.Name> requestedColumns = selection.getColumns();
+ for (CFDefinition.Name name : requestedColumns)
+ if (name.kind != CFDefinition.Name.Kind.KEY_ALIAS && name.kind != CFDefinition.Name.Kind.STATIC)
+ throw new InvalidRequestException(String.format("SELECT DISTINCT queries must only request partition key columns and/or static columns (not %s)", name));
+
+ // If it's a key range, we require that all partition key columns are selected so we don't have to bother with post-query grouping.
+ if (!isKeyRange)
+ return;
+
+ for (CFDefinition.Name name : cfDef.partitionKeys())
+ if (!requestedColumns.contains(name))
+ throw new InvalidRequestException(String.format("SELECT DISTINCT queries must request all the partition key columns (missing %s)", name));
+ }
+
public static class RawStatement extends CFStatement
{
private final Parameters parameters;
@@ -1363,9 +1381,6 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache
? Selection.wildcard(cfDef)
: Selection.fromSelectors(cfDef, selectClause);
- if (parameters.isDistinct)
- validateDistinctSelection(selection.getColumns(), cfDef.partitionKeys());
-
SelectStatement stmt = new SelectStatement(cfDef, boundNames.size(), parameters, selection, prepareLimit(boundNames));
/*
@@ -1440,6 +1455,9 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache
checkNeedsFiltering(stmt);
+ if (parameters.isDistinct)
+ stmt.validateDistinctSelection();
+
return new ParsedStatement.Prepared(stmt, boundNames);
}
@@ -1961,18 +1979,6 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache
}
}
- private void validateDistinctSelection(Collection<CFDefinition.Name> requestedColumns, Collection<CFDefinition.Name> partitionKey)
- throws InvalidRequestException
- {
- for (CFDefinition.Name name : requestedColumns)
- if (!partitionKey.contains(name))
- throw new InvalidRequestException(String.format("SELECT DISTINCT queries must only request partition key columns (not %s)", name));
-
- for (CFDefinition.Name name : partitionKey)
- if (!requestedColumns.contains(name))
- throw new InvalidRequestException(String.format("SELECT DISTINCT queries must request all the partition key columns (missing %s)", name));
- }
-
private boolean containsAlias(final ColumnIdentifier name)
{
return Iterables.any(selectClause, new Predicate<RawSelector>()