You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by ty...@apache.org on 2016/09/27 16:54:09 UTC
[02/10] cassandra git commit: Treat IN values as a set instead of a
list
Treat IN values as a set instead of a list
Patch by Tyler Hobbs; reviewed by Benjamin Lerer for CASSANDRA-12420
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/cdd535fc
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/cdd535fc
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/cdd535fc
Branch: refs/heads/cassandra-2.2
Commit: cdd535fcac4ba79bb371e8373c6504d9e3978853
Parents: 6fb89b9
Author: Tyler Hobbs <ty...@gmail.com>
Authored: Tue Sep 27 11:51:41 2016 -0500
Committer: Tyler Hobbs <ty...@gmail.com>
Committed: Tue Sep 27 11:51:41 2016 -0500
----------------------------------------------------------------------
CHANGES.txt | 3 ++
NEWS.txt | 7 +++
.../cql3/statements/SelectStatement.java | 45 +++++++++-----------
.../entities/FrozenCollectionsTest.java | 8 ++--
.../validation/operations/SelectLimitTest.java | 2 +-
.../SelectMultiColumnRelationTest.java | 6 +--
.../operations/SelectOrderByTest.java | 8 ++--
7 files changed, 41 insertions(+), 38 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/cdd535fc/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 1438e98..b778444 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,7 @@
2.1.16
+ * Avoid infinitely looping result set when paging SELECT queries with
+ an IN clause with duplicate keys by treating the IN values as a set instead
+ of a list (CASSANDRA-12420)
* Add system property to set the max number of native transport requests in queue (CASSANDRA-11363)
* Include column family parameter when -st and -et are provided (CASSANDRA-11866)
* Fix queries with empty ByteBuffer values in clustering column restrictions (CASSANDRA-12127)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/cdd535fc/NEWS.txt
----------------------------------------------------------------------
diff --git a/NEWS.txt b/NEWS.txt
index 6a70adc..2db34ed 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -18,6 +18,13 @@ using the provided 'sstableupgrade' tool.
Upgrading
---------
+ - Duplicate partition keys in SELECT statement IN clauses will now be
+ filtered out, meaning that duplicate results will no longer be returned.
+ Futhermore, the partitions will be returned in the order of the sorted
+ partition keys instead of the order of the IN values; this matches the
+ behavior of Cassandra 2.2+. This was necessary to avoid an infinitely
+ looping result set when combined with paging under some circumstances.
+ See CASSANDRA-12420 for details.
- The ReversedType behaviour has been corrected for clustering columns of
BYTES type containing empty value. Scrub should be run on the existing
SSTables containing a descending clustering column of BYTES type to correct
http://git-wip-us.apache.org/repos/asf/cassandra/blob/cdd535fc/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
index 40f3f33..fe63b44 100644
--- a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
+++ b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
@@ -71,13 +71,6 @@ public class SelectStatement implements CQLStatement
private static final int DEFAULT_COUNT_PAGE_SIZE = 10000;
- /**
- * In the current version a query containing duplicate values in an IN restriction on the partition key will
- * cause the same record to be returned multiple time. This behavior will be changed in 3.0 but until then
- * we will log a warning the first time this problem occurs.
- */
- private static volatile boolean HAS_LOGGED_WARNING_FOR_IN_RESTRICTION_WITH_DUPLICATES;
-
private final int boundTerms;
public final CFMetaData cfm;
public final Parameters parameters;
@@ -682,9 +675,9 @@ public class SelectStatement implements CQLStatement
: limit;
}
- private Collection<ByteBuffer> getKeys(final QueryOptions options) throws InvalidRequestException
+ private NavigableSet<ByteBuffer> getKeys(final QueryOptions options) throws InvalidRequestException
{
- List<ByteBuffer> keys = new ArrayList<ByteBuffer>();
+ TreeSet<ByteBuffer> sortedKeys = new TreeSet<>(cfm.getKeyValidator());
CBuilder builder = cfm.getKeyValidatorAsCType().builder();
for (ColumnDefinition def : cfm.partitionKeyColumns())
{
@@ -695,18 +688,14 @@ public class SelectStatement implements CQLStatement
if (builder.remainingCount() == 1)
{
- if (values.size() > 1 && !HAS_LOGGED_WARNING_FOR_IN_RESTRICTION_WITH_DUPLICATES && containsDuplicates(values))
- {
- // This approach does not fully prevent race conditions but it is not a big deal.
- HAS_LOGGED_WARNING_FOR_IN_RESTRICTION_WITH_DUPLICATES = true;
- logger.warn("SELECT queries with IN restrictions on the partition key containing duplicate values will return duplicate rows.");
- }
-
for (ByteBuffer val : values)
{
if (val == null)
throw new InvalidRequestException(String.format("Invalid null value for partition key part %s", def.name));
- keys.add(builder.buildWith(val).toByteBuffer());
+
+ ByteBuffer keyBuffer = builder.buildWith(val).toByteBuffer();
+ validateKey(keyBuffer);
+ sortedKeys.add(keyBuffer);
}
}
else
@@ -720,18 +709,22 @@ public class SelectStatement implements CQLStatement
builder.add(val);
}
}
- return keys;
+ return sortedKeys;
}
- /**
- * Checks if the specified list contains duplicate values.
- *
- * @param values the values to check
- * @return <code>true</code> if the specified list contains duplicate values, <code>false</code> otherwise.
- */
- private static boolean containsDuplicates(List<ByteBuffer> values)
+ private void validateKey(ByteBuffer keyBuffer) throws InvalidRequestException
{
- return new HashSet<>(values).size() < values.size();
+ if (keyBuffer == null || keyBuffer.remaining() == 0)
+ throw new InvalidRequestException("Key may not be empty");
+
+ try
+ {
+ cfm.getKeyValidator().validate(keyBuffer);
+ }
+ catch (MarshalException exc)
+ {
+ throw new InvalidRequestException("Partition key IN clause contained invalid value: " + exc);
+ }
}
private ByteBuffer getKeyBound(Bound b, QueryOptions options) throws InvalidRequestException
http://git-wip-us.apache.org/repos/asf/cassandra/blob/cdd535fc/test/unit/org/apache/cassandra/cql3/validation/entities/FrozenCollectionsTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/cql3/validation/entities/FrozenCollectionsTest.java b/test/unit/org/apache/cassandra/cql3/validation/entities/FrozenCollectionsTest.java
index beed560..fb50e83 100644
--- a/test/unit/org/apache/cassandra/cql3/validation/entities/FrozenCollectionsTest.java
+++ b/test/unit/org/apache/cassandra/cql3/validation/entities/FrozenCollectionsTest.java
@@ -85,8 +85,8 @@ public class FrozenCollectionsTest extends CQLTester
);
assertRows(execute("SELECT * FROM %s WHERE k IN ?", list(set(4, 5, 6), set())),
- row(set(4, 5, 6), 0),
- row(set(), 0)
+ row(set(), 0),
+ row(set(4, 5, 6), 0)
);
assertRows(execute("SELECT * FROM %s WHERE token(k) >= token(?)", set(4, 5, 6)),
@@ -154,9 +154,9 @@ public class FrozenCollectionsTest extends CQLTester
);
assertRows(execute("SELECT * FROM %s WHERE k IN ?", list(map(set(4, 5, 6), list(1, 2, 3)), map(), map(set(), list(1, 2, 3)))),
- row(map(set(4, 5, 6), list(1, 2, 3)), 0),
row(map(), 0),
- row(map(set(), list(1, 2, 3)), 0)
+ row(map(set(), list(1, 2, 3)), 0),
+ row(map(set(4, 5, 6), list(1, 2, 3)), 0)
);
assertRows(execute("SELECT * FROM %s WHERE token(k) >= token(?)", map(set(4, 5, 6), list(1, 2, 3))),
http://git-wip-us.apache.org/repos/asf/cassandra/blob/cdd535fc/test/unit/org/apache/cassandra/cql3/validation/operations/SelectLimitTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/cql3/validation/operations/SelectLimitTest.java b/test/unit/org/apache/cassandra/cql3/validation/operations/SelectLimitTest.java
index eb2be60..5cd24dd 100644
--- a/test/unit/org/apache/cassandra/cql3/validation/operations/SelectLimitTest.java
+++ b/test/unit/org/apache/cassandra/cql3/validation/operations/SelectLimitTest.java
@@ -72,7 +72,7 @@ public class SelectLimitTest extends CQLTester
// Check that we do limit the output to 1 *and* that we respect query
// order of keys (even though 48 is after 2)
assertRows(execute("SELECT * FROM %s WHERE userid IN (48, 2) LIMIT 1"),
- row(48, "http://foo.com", 42L));
+ row(2, "http://foo.com", 42L));
}
/**
http://git-wip-us.apache.org/repos/asf/cassandra/blob/cdd535fc/test/unit/org/apache/cassandra/cql3/validation/operations/SelectMultiColumnRelationTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/cql3/validation/operations/SelectMultiColumnRelationTest.java b/test/unit/org/apache/cassandra/cql3/validation/operations/SelectMultiColumnRelationTest.java
index 98dda26..5f82328 100644
--- a/test/unit/org/apache/cassandra/cql3/validation/operations/SelectMultiColumnRelationTest.java
+++ b/test/unit/org/apache/cassandra/cql3/validation/operations/SelectMultiColumnRelationTest.java
@@ -629,10 +629,10 @@ public class SelectMultiColumnRelationTest extends CQLTester
// same query, but reversed order for the IN values
assertRows(execute("SELECT * FROM %s WHERE a IN (?, ?) AND (b, c, d) IN (?, ?)", 1, 0, tuple(0, 1, 1), tuple(0, 1, 0)),
- row(1, 0, 1, 0),
- row(1, 0, 1, 1),
row(0, 0, 1, 0),
- row(0, 0, 1, 1)
+ row(0, 0, 1, 1),
+ row(1, 0, 1, 0),
+ row(1, 0, 1, 1)
);
assertRows(execute("SELECT * FROM %s WHERE a IN (?, ?) and (b, c) IN ((?, ?))", 0, 1, 0, 1),
http://git-wip-us.apache.org/repos/asf/cassandra/blob/cdd535fc/test/unit/org/apache/cassandra/cql3/validation/operations/SelectOrderByTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/cql3/validation/operations/SelectOrderByTest.java b/test/unit/org/apache/cassandra/cql3/validation/operations/SelectOrderByTest.java
index cf923bc..2cc0d6c 100644
--- a/test/unit/org/apache/cassandra/cql3/validation/operations/SelectOrderByTest.java
+++ b/test/unit/org/apache/cassandra/cql3/validation/operations/SelectOrderByTest.java
@@ -517,12 +517,12 @@ public class SelectOrderByTest extends CQLTester
row(2),
row(0));
assertRows(execute("SELECT v FROM %s WHERE k IN (1, 0)"),
- row(3),
- row(4),
- row(5),
row(0),
row(1),
- row(2));
+ row(2),
+ row(3),
+ row(4),
+ row(5));
assertRows(execute("SELECT v FROM %s WHERE k IN (1, 0) ORDER BY c1 ASC"),
row(0),