You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by ty...@apache.org on 2016/09/27 16:54:11 UTC

[04/10] cassandra git commit: Treat IN values as a set instead of a list

Treat IN values as a set instead of a list

Patch by Tyler Hobbs; reviewed by Benjamin Lerer for CASSANDRA-12420


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/cdd535fc
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/cdd535fc
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/cdd535fc

Branch: refs/heads/trunk
Commit: cdd535fcac4ba79bb371e8373c6504d9e3978853
Parents: 6fb89b9
Author: Tyler Hobbs <ty...@gmail.com>
Authored: Tue Sep 27 11:51:41 2016 -0500
Committer: Tyler Hobbs <ty...@gmail.com>
Committed: Tue Sep 27 11:51:41 2016 -0500

----------------------------------------------------------------------
 CHANGES.txt                                     |  3 ++
 NEWS.txt                                        |  7 +++
 .../cql3/statements/SelectStatement.java        | 45 +++++++++-----------
 .../entities/FrozenCollectionsTest.java         |  8 ++--
 .../validation/operations/SelectLimitTest.java  |  2 +-
 .../SelectMultiColumnRelationTest.java          |  6 +--
 .../operations/SelectOrderByTest.java           |  8 ++--
 7 files changed, 41 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/cdd535fc/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 1438e98..b778444 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,7 @@
 2.1.16
+ * Avoid infinitely looping result set when paging SELECT queries with
+   an IN clause with duplicate keys by treating the IN values as a set instead
+   of a list (CASSANDRA-12420)
  * Add system property to set the max number of native transport requests in queue (CASSANDRA-11363)
  * Include column family parameter when -st and -et are provided (CASSANDRA-11866)
  * Fix queries with empty ByteBuffer values in clustering column restrictions (CASSANDRA-12127) 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/cdd535fc/NEWS.txt
----------------------------------------------------------------------
diff --git a/NEWS.txt b/NEWS.txt
index 6a70adc..2db34ed 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -18,6 +18,13 @@ using the provided 'sstableupgrade' tool.
 
 Upgrading
 ---------
+    - Duplicate partition keys in SELECT statement IN clauses will now be
+      filtered out, meaning that duplicate results will no longer be returned.
+      Futhermore, the partitions will be returned in the order of the sorted
+      partition keys instead of the order of the IN values; this matches the
+      behavior of Cassandra 2.2+.  This was necessary to avoid an infinitely
+      looping result set when combined with paging under some circumstances.
+      See CASSANDRA-12420 for details.
     - The ReversedType behaviour has been corrected for clustering columns of
       BYTES type containing empty value. Scrub should be run on the existing
       SSTables containing a descending clustering column of BYTES type to correct

http://git-wip-us.apache.org/repos/asf/cassandra/blob/cdd535fc/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
index 40f3f33..fe63b44 100644
--- a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
+++ b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
@@ -71,13 +71,6 @@ public class SelectStatement implements CQLStatement
 
     private static final int DEFAULT_COUNT_PAGE_SIZE = 10000;
 
-    /**
-     * In the current version a query containing duplicate values in an IN restriction on the partition key will
-     * cause the same record to be returned multiple time. This behavior will be changed in 3.0 but until then
-     * we will log a warning the first time this problem occurs.
-     */
-    private static volatile boolean HAS_LOGGED_WARNING_FOR_IN_RESTRICTION_WITH_DUPLICATES;
-
     private final int boundTerms;
     public final CFMetaData cfm;
     public final Parameters parameters;
@@ -682,9 +675,9 @@ public class SelectStatement implements CQLStatement
              : limit;
     }
 
-    private Collection<ByteBuffer> getKeys(final QueryOptions options) throws InvalidRequestException
+    private NavigableSet<ByteBuffer> getKeys(final QueryOptions options) throws InvalidRequestException
     {
-        List<ByteBuffer> keys = new ArrayList<ByteBuffer>();
+        TreeSet<ByteBuffer> sortedKeys = new TreeSet<>(cfm.getKeyValidator());
         CBuilder builder = cfm.getKeyValidatorAsCType().builder();
         for (ColumnDefinition def : cfm.partitionKeyColumns())
         {
@@ -695,18 +688,14 @@ public class SelectStatement implements CQLStatement
 
             if (builder.remainingCount() == 1)
             {
-                if (values.size() > 1 && !HAS_LOGGED_WARNING_FOR_IN_RESTRICTION_WITH_DUPLICATES  && containsDuplicates(values))
-                {
-                    // This approach does not fully prevent race conditions but it is not a big deal.
-                    HAS_LOGGED_WARNING_FOR_IN_RESTRICTION_WITH_DUPLICATES = true;
-                    logger.warn("SELECT queries with IN restrictions on the partition key containing duplicate values will return duplicate rows.");
-                }
-
                 for (ByteBuffer val : values)
                 {
                     if (val == null)
                         throw new InvalidRequestException(String.format("Invalid null value for partition key part %s", def.name));
-                    keys.add(builder.buildWith(val).toByteBuffer());
+
+                    ByteBuffer keyBuffer = builder.buildWith(val).toByteBuffer();
+                    validateKey(keyBuffer);
+                    sortedKeys.add(keyBuffer);
                 }
             }
             else
@@ -720,18 +709,22 @@ public class SelectStatement implements CQLStatement
                 builder.add(val);
             }
         }
-        return keys;
+        return sortedKeys;
     }
 
-    /**
-     * Checks if the specified list contains duplicate values.
-     *
-     * @param values the values to check
-     * @return <code>true</code> if the specified list contains duplicate values, <code>false</code> otherwise.
-     */
-    private static boolean containsDuplicates(List<ByteBuffer> values)
+    private void validateKey(ByteBuffer keyBuffer) throws InvalidRequestException
     {
-        return new HashSet<>(values).size() < values.size();
+        if (keyBuffer == null || keyBuffer.remaining() == 0)
+            throw new InvalidRequestException("Key may not be empty");
+
+        try
+        {
+            cfm.getKeyValidator().validate(keyBuffer);
+        }
+        catch (MarshalException exc)
+        {
+            throw new InvalidRequestException("Partition key IN clause contained invalid value: " + exc);
+        }
     }
 
     private ByteBuffer getKeyBound(Bound b, QueryOptions options) throws InvalidRequestException

http://git-wip-us.apache.org/repos/asf/cassandra/blob/cdd535fc/test/unit/org/apache/cassandra/cql3/validation/entities/FrozenCollectionsTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/cql3/validation/entities/FrozenCollectionsTest.java b/test/unit/org/apache/cassandra/cql3/validation/entities/FrozenCollectionsTest.java
index beed560..fb50e83 100644
--- a/test/unit/org/apache/cassandra/cql3/validation/entities/FrozenCollectionsTest.java
+++ b/test/unit/org/apache/cassandra/cql3/validation/entities/FrozenCollectionsTest.java
@@ -85,8 +85,8 @@ public class FrozenCollectionsTest extends CQLTester
         );
 
         assertRows(execute("SELECT * FROM %s WHERE k IN ?", list(set(4, 5, 6), set())),
-                row(set(4, 5, 6), 0),
-                row(set(), 0)
+                row(set(), 0),
+                row(set(4, 5, 6), 0)
         );
 
         assertRows(execute("SELECT * FROM %s WHERE token(k) >= token(?)", set(4, 5, 6)),
@@ -154,9 +154,9 @@ public class FrozenCollectionsTest extends CQLTester
         );
 
         assertRows(execute("SELECT * FROM %s WHERE k IN ?", list(map(set(4, 5, 6), list(1, 2, 3)), map(), map(set(), list(1, 2, 3)))),
-            row(map(set(4, 5, 6), list(1, 2, 3)), 0),
             row(map(), 0),
-            row(map(set(), list(1, 2, 3)), 0)
+            row(map(set(), list(1, 2, 3)), 0),
+            row(map(set(4, 5, 6), list(1, 2, 3)), 0)
         );
 
         assertRows(execute("SELECT * FROM %s WHERE token(k) >= token(?)", map(set(4, 5, 6), list(1, 2, 3))),

http://git-wip-us.apache.org/repos/asf/cassandra/blob/cdd535fc/test/unit/org/apache/cassandra/cql3/validation/operations/SelectLimitTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/cql3/validation/operations/SelectLimitTest.java b/test/unit/org/apache/cassandra/cql3/validation/operations/SelectLimitTest.java
index eb2be60..5cd24dd 100644
--- a/test/unit/org/apache/cassandra/cql3/validation/operations/SelectLimitTest.java
+++ b/test/unit/org/apache/cassandra/cql3/validation/operations/SelectLimitTest.java
@@ -72,7 +72,7 @@ public class SelectLimitTest extends CQLTester
         // Check that we do limit the output to 1 *and* that we respect query
         // order of keys (even though 48 is after 2)
         assertRows(execute("SELECT * FROM %s WHERE userid IN (48, 2) LIMIT 1"),
-                   row(48, "http://foo.com", 42L));
+                   row(2, "http://foo.com", 42L));
     }
 
     /**

http://git-wip-us.apache.org/repos/asf/cassandra/blob/cdd535fc/test/unit/org/apache/cassandra/cql3/validation/operations/SelectMultiColumnRelationTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/cql3/validation/operations/SelectMultiColumnRelationTest.java b/test/unit/org/apache/cassandra/cql3/validation/operations/SelectMultiColumnRelationTest.java
index 98dda26..5f82328 100644
--- a/test/unit/org/apache/cassandra/cql3/validation/operations/SelectMultiColumnRelationTest.java
+++ b/test/unit/org/apache/cassandra/cql3/validation/operations/SelectMultiColumnRelationTest.java
@@ -629,10 +629,10 @@ public class SelectMultiColumnRelationTest extends CQLTester
 
             // same query, but reversed order for the IN values
             assertRows(execute("SELECT * FROM %s WHERE a IN (?, ?) AND (b, c, d) IN (?, ?)", 1, 0, tuple(0, 1, 1), tuple(0, 1, 0)),
-                    row(1, 0, 1, 0),
-                    row(1, 0, 1, 1),
                     row(0, 0, 1, 0),
-                    row(0, 0, 1, 1)
+                    row(0, 0, 1, 1),
+                    row(1, 0, 1, 0),
+                    row(1, 0, 1, 1)
             );
 
             assertRows(execute("SELECT * FROM %s WHERE a IN (?, ?) and (b, c) IN ((?, ?))", 0, 1, 0, 1),

http://git-wip-us.apache.org/repos/asf/cassandra/blob/cdd535fc/test/unit/org/apache/cassandra/cql3/validation/operations/SelectOrderByTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/cql3/validation/operations/SelectOrderByTest.java b/test/unit/org/apache/cassandra/cql3/validation/operations/SelectOrderByTest.java
index cf923bc..2cc0d6c 100644
--- a/test/unit/org/apache/cassandra/cql3/validation/operations/SelectOrderByTest.java
+++ b/test/unit/org/apache/cassandra/cql3/validation/operations/SelectOrderByTest.java
@@ -517,12 +517,12 @@ public class SelectOrderByTest extends CQLTester
                    row(2),
                    row(0));
         assertRows(execute("SELECT v FROM %s WHERE k IN (1, 0)"),
-                   row(3),
-                   row(4),
-                   row(5),
                    row(0),
                    row(1),
-                   row(2));
+                   row(2),
+                   row(3),
+                   row(4),
+                   row(5));
 
         assertRows(execute("SELECT v FROM %s WHERE k IN (1, 0) ORDER BY c1 ASC"),
                    row(0),