You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by sn...@apache.org on 2015/01/21 20:25:24 UTC

[1/3] cassandra git commit: Duplicate rows returned when in clause has repeated values

Repository: cassandra
Updated Branches:
  refs/heads/cassandra-2.1 faf91818b -> 0c2eaa9cb
  refs/heads/trunk 0a09b87dc -> 732986bbd


Duplicate rows returned when in clause has repeated values

Patch by Benjamin Lerer, reviewed by Robert Stupp for CASSANDRA-6706


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/0c2eaa9c
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/0c2eaa9c
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/0c2eaa9c

Branch: refs/heads/cassandra-2.1
Commit: 0c2eaa9cbb51f064b439c4d098adb5aa76b65b0f
Parents: faf9181
Author: Benjamin Lerer <b_...@hotmail.com>
Authored: Wed Jan 21 20:12:55 2015 +0100
Committer: Robert Stupp <sn...@snazy.de>
Committed: Wed Jan 21 20:12:55 2015 +0100

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../cql3/statements/SelectStatement.java        | 25 ++++++++++++++++++++
 2 files changed, 26 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/0c2eaa9c/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index f1eaa77..9cd8189 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 2.1.3
+ * Duplicate rows returned when in clause has repeated values (CASSANDRA-6707)
  * Add tooling to detect hot partitions (CASSANDRA-7974)
  * Fix cassandra-stress user-mode truncation of partition generation (CASSANDRA-8608)
  * Only stream from unrepaired sstables during inc repair (CASSANDRA-8267)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/0c2eaa9c/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
index 30259dd..633d43c 100644
--- a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
+++ b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
@@ -65,6 +65,13 @@ public class SelectStatement implements CQLStatement
 
     private static final int DEFAULT_COUNT_PAGE_SIZE = 10000;
 
+    /**
+     * In the current version a query containing duplicate values in an IN restriction on the partition key will
+     * cause the same record to be returned multiple time. This behavior will be changed in 3.0 but until then
+     * we will log a warning the first time this problem occurs.
+     */
+    private static volatile boolean HAS_LOGGED_WARNING_FOR_IN_RESTRICTION_WITH_DUPLICATES;
+
     private final int boundTerms;
     public final CFMetaData cfm;
     public final Parameters parameters;
@@ -588,6 +595,13 @@ public class SelectStatement implements CQLStatement
 
             if (builder.remainingCount() == 1)
             {
+                if (values.size() > 1 && !HAS_LOGGED_WARNING_FOR_IN_RESTRICTION_WITH_DUPLICATES  && containsDuplicates(values))
+                {
+                    // This approach does not fully prevent race conditions but it is not a big deal.
+                    HAS_LOGGED_WARNING_FOR_IN_RESTRICTION_WITH_DUPLICATES = true;
+                    logger.warn("SELECT queries with IN restrictions on the partition key containing duplicate values will return duplicate rows.");
+                }
+
                 for (ByteBuffer val : values)
                 {
                     if (val == null)
@@ -609,6 +623,17 @@ public class SelectStatement implements CQLStatement
         return keys;
     }
 
+    /**
+     * Checks if the specified list contains duplicate values.
+     *
+     * @param values the values to check
+     * @return <code>true</code> if the specified list contains duplicate values, <code>false</code> otherwise.
+     */
+    private static boolean containsDuplicates(List<ByteBuffer> values)
+    {
+        return new HashSet<>(values).size() < values.size();
+    }
+
     private ByteBuffer getKeyBound(Bound b, QueryOptions options) throws InvalidRequestException
     {
         // Deal with unrestricted partition key components (special-casing is required to deal with 2i queries on the first


[3/3] cassandra git commit: Merge branch 'cassandra-2.1' into trunk

Posted by sn...@apache.org.
Merge branch 'cassandra-2.1' into trunk


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/732986bb
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/732986bb
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/732986bb

Branch: refs/heads/trunk
Commit: 732986bbd02a5acafaf97de24ea555c8d426b873
Parents: 0a09b87 0c2eaa9
Author: Benjamin Lerer <b_...@hotmail.com>
Authored: Wed Jan 21 20:19:42 2015 +0100
Committer: Robert Stupp <sn...@snazy.de>
Committed: Wed Jan 21 20:19:42 2015 +0100

----------------------------------------------------------------------
 CHANGES.txt                                     |  3 ++-
 .../cassandra/cql3/MultiColumnRelationTest.java | 15 +++++++++++++
 .../cql3/SingleColumnRelationTest.java          | 22 ++++++++++++++++++--
 3 files changed, 37 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/732986bb/CHANGES.txt
----------------------------------------------------------------------
diff --cc CHANGES.txt
index 9343d66,9cd8189..cc6478e
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@@ -1,58 -1,5 +1,59 @@@
 -2.1.3
 +3.0
+  * Duplicate rows returned when in clause has repeated values (CASSANDRA-6707)
 + * Make CassandraException unchecked, extend RuntimeException (CASSANDRA-8560)
 + * Support direct buffer decompression for reads (CASSANDRA-8464)
 + * DirectByteBuffer compatible LZ4 methods (CASSANDRA-7039)
 + * Add role based access control (CASSANDRA-7653)
 + * Group sstables for anticompaction correctly (CASSANDRA-8578)
 + * Add ReadFailureException to native protocol, respond
 +   immediately when replicas encounter errors while handling
 +   a read request (CASSANDRA-7886)
 + * Switch CommitLogSegment from RandomAccessFile to nio (CASSANDRA-8308)
 + * Allow mixing token and partition key restrictions (CASSANDRA-7016)
 + * Support index key/value entries on map collections (CASSANDRA-8473)
 + * Modernize schema tables (CASSANDRA-8261)
 + * Support for user-defined aggregation functions (CASSANDRA-8053)
 + * Fix NPE in SelectStatement with empty IN values (CASSANDRA-8419)
 + * Refactor SelectStatement, return IN results in natural order instead
-    of IN value list order (CASSANDRA-7981)
++   of IN value list order and ignore duplicate values in partition key IN restrictions (CASSANDRA-7981)
 + * Support UDTs, tuples, and collections in user-defined
 +   functions (CASSANDRA-7563)
 + * Fix aggregate fn results on empty selection, result column name,
 +   and cqlsh parsing (CASSANDRA-8229)
 + * Mark sstables as repaired after full repair (CASSANDRA-7586)
 + * Extend Descriptor to include a format value and refactor reader/writer
 +   APIs (CASSANDRA-7443)
 + * Integrate JMH for microbenchmarks (CASSANDRA-8151)
 + * Keep sstable levels when bootstrapping (CASSANDRA-7460)
 + * Add Sigar library and perform basic OS settings check on startup (CASSANDRA-7838)
 + * Support for aggregation functions (CASSANDRA-4914)
 + * Remove cassandra-cli (CASSANDRA-7920)
 + * Accept dollar quoted strings in CQL (CASSANDRA-7769)
 + * Make assassinate a first class command (CASSANDRA-7935)
 + * Support IN clause on any clustering column (CASSANDRA-4762)
 + * Improve compaction logging (CASSANDRA-7818)
 + * Remove YamlFileNetworkTopologySnitch (CASSANDRA-7917)
 + * Do anticompaction in groups (CASSANDRA-6851)
 + * Support user-defined functions (CASSANDRA-7395, 7526, 7562, 7740, 7781, 7929,
 +   7924, 7812, 8063, 7813, 7708)
 + * Permit configurable timestamps with cassandra-stress (CASSANDRA-7416)
 + * Move sstable RandomAccessReader to nio2, which allows using the
 +   FILE_SHARE_DELETE flag on Windows (CASSANDRA-4050)
 + * Remove CQL2 (CASSANDRA-5918)
 + * Add Thrift get_multi_slice call (CASSANDRA-6757)
 + * Optimize fetching multiple cells by name (CASSANDRA-6933)
 + * Allow compilation in java 8 (CASSANDRA-7028)
 + * Make incremental repair default (CASSANDRA-7250)
 + * Enable code coverage thru JaCoCo (CASSANDRA-7226)
 + * Switch external naming of 'column families' to 'tables' (CASSANDRA-4369) 
 + * Shorten SSTable path (CASSANDRA-6962)
 + * Use unsafe mutations for most unit tests (CASSANDRA-6969)
 + * Fix race condition during calculation of pending ranges (CASSANDRA-7390)
 + * Fail on very large batch sizes (CASSANDRA-8011)
 + * Improve concurrency of repair (CASSANDRA-6455, 8208)
 +
 +
 +2.1.3
   * Add tooling to detect hot partitions (CASSANDRA-7974)
   * Fix cassandra-stress user-mode truncation of partition generation (CASSANDRA-8608)
   * Only stream from unrepaired sstables during inc repair (CASSANDRA-8267)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/732986bb/test/unit/org/apache/cassandra/cql3/MultiColumnRelationTest.java
----------------------------------------------------------------------
diff --cc test/unit/org/apache/cassandra/cql3/MultiColumnRelationTest.java
index b178498,4c3ba2a..3e6e45b
--- a/test/unit/org/apache/cassandra/cql3/MultiColumnRelationTest.java
+++ b/test/unit/org/apache/cassandra/cql3/MultiColumnRelationTest.java
@@@ -583,81 -555,4 +583,96 @@@ public class MultiColumnRelationTest ex
              assertEmpty(execute("SELECT * FROM %s WHERE a = ? AND (b, c) > (?, ?)", 0, 1, 0));
          }
      }
 +
 +    @Test
 +    public void testMultipleClusteringWithIndex() throws Throwable
 +    {
 +        createTable("CREATE TABLE %s (a int, b int, c int, d int, e int, PRIMARY KEY (a, b, c, d))");
 +        createIndex("CREATE INDEX ON %s (b)");
 +        createIndex("CREATE INDEX ON %s (e)");
 +
 +        execute("INSERT INTO %s (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)", 0, 0, 0, 0, 0);
 +        execute("INSERT INTO %s (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)", 0, 0, 1, 0, 1);
 +        execute("INSERT INTO %s (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)", 0, 0, 1, 1, 2);
 +
 +        execute("INSERT INTO %s (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)", 0, 1, 0, 0, 0);
 +        execute("INSERT INTO %s (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)", 0, 1, 1, 0, 1);
 +        execute("INSERT INTO %s (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)", 0, 1, 1, 1, 2);
 +
 +        execute("INSERT INTO %s (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)", 0, 2, 0, 0, 0);
 +
 +        assertRows(execute("SELECT * FROM %s WHERE (b) = (?)", 1),
 +                   row(0, 1, 0, 0, 0),
 +                   row(0, 1, 1, 0, 1),
 +                   row(0, 1, 1, 1, 2));
 +
 +        assertRows(execute("SELECT * FROM %s WHERE (b, c) = (?, ?) ALLOW FILTERING", 1, 1),
 +                   row(0, 1, 1, 0, 1),
 +                   row(0, 1, 1, 1, 2));
 +
 +        assertRows(execute("SELECT * FROM %s WHERE (b, c) = (?, ?) AND e = ? ALLOW FILTERING", 1, 1, 2),
 +                   row(0, 1, 1, 1, 2));
 +
 +        assertRows(execute("SELECT * FROM %s WHERE (b) IN ((?)) AND e = ? ALLOW FILTERING", 1, 2),
 +                   row(0, 1, 1, 1, 2));
 +
 +        assertInvalidMessage("IN restrictions are not supported on indexed columns",
 +                             "SELECT * FROM %s WHERE (b) IN ((?), (?)) AND e = ? ALLOW FILTERING", 0, 1, 2);
 +
 +        assertInvalidMessage("IN restrictions are not supported on indexed columns",
 +                             "SELECT * FROM %s WHERE (b, c) IN ((?, ?)) AND e = ? ALLOW FILTERING", 0, 1, 2);
 +
 +        assertInvalidMessage("IN restrictions are not supported on indexed columns",
 +                             "SELECT * FROM %s WHERE (b, c) IN ((?, ?), (?, ?)) AND e = ? ALLOW FILTERING", 0, 1, 1, 1, 2);
 +
 +        assertInvalidMessage("Slice restrictions are not supported on indexed columns which are part of a multi column relation",
 +                             "SELECT * FROM %s WHERE (b) >= (?) AND e = ? ALLOW FILTERING", 1, 2);
 +    }
 +
 +    @Test
 +    public void testMultiplePartitionKeyAndMultiClusteringWithIndex() throws Throwable
 +    {
 +        createTable("CREATE TABLE %s (a int, b int, c int, d int, e int, PRIMARY KEY ((a, b), c, d, e))");
 +        createIndex("CREATE INDEX ON %s (c)");
 +
 +        execute("INSERT INTO %s (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)", 0, 0, 0, 0, 0);
 +        execute("INSERT INTO %s (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)", 0, 0, 0, 1, 0);
 +        execute("INSERT INTO %s (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)", 0, 0, 0, 1, 1);
 +
 +        execute("INSERT INTO %s (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)", 0, 0, 1, 0, 0);
 +        execute("INSERT INTO %s (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)", 0, 0, 1, 1, 0);
 +        execute("INSERT INTO %s (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)", 0, 0, 1, 1, 1);
 +
 +        execute("INSERT INTO %s (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)", 0, 0, 2, 0, 0);
 +
 +        assertRows(execute("SELECT * FROM %s WHERE a = ? AND (c) = (?) ALLOW FILTERING", 0, 1),
 +                   row(0, 0, 1, 0, 0),
 +                   row(0, 0, 1, 1, 0),
 +                   row(0, 0, 1, 1, 1));
 +
 +        assertRows(execute("SELECT * FROM %s WHERE a = ? AND (c, d) = (?, ?) ALLOW FILTERING", 0, 1, 1),
 +                   row(0, 0, 1, 1, 0),
 +                   row(0, 0, 1, 1, 1));
 +
 +        assertInvalidMessage("Partition key parts: b must be restricted as other parts are",
 +                             "SELECT * FROM %s WHERE a = ? AND (c, d) IN ((?, ?)) ALLOW FILTERING", 0, 1, 1);
 +
 +        assertInvalidMessage("Partition key parts: b must be restricted as other parts are",
 +                             "SELECT * FROM %s WHERE a = ? AND (c, d) >= (?, ?) ALLOW FILTERING", 0, 1, 1);
 +    }
++
++    @Test
++    public void testINWithDuplicateValue() throws Throwable
++    {
++        for (String compactOption : new String[] { "", " WITH COMPACT STORAGE" })
++        {
++            createTable("CREATE TABLE %s (k1 int, k2 int, v int, PRIMARY KEY (k1, k2))" + compactOption);
++            execute("INSERT INTO %s (k1,  k2, v) VALUES (?, ?, ?)", 1, 1, 1);
++
++            assertRows(execute("SELECT * FROM %s WHERE k1 IN (?, ?) AND (k2) IN ((?), (?))", 1, 1, 1, 2),
++                       row(1, 1, 1));
++            assertRows(execute("SELECT * FROM %s WHERE k1 = ? AND (k2) IN ((?), (?))", 1, 1, 1),
++                       row(1, 1, 1));
++        }
++    }
  }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/732986bb/test/unit/org/apache/cassandra/cql3/SingleColumnRelationTest.java
----------------------------------------------------------------------
diff --cc test/unit/org/apache/cassandra/cql3/SingleColumnRelationTest.java
index 0fd300b,2ad4bda..f505a04
--- a/test/unit/org/apache/cassandra/cql3/SingleColumnRelationTest.java
+++ b/test/unit/org/apache/cassandra/cql3/SingleColumnRelationTest.java
@@@ -50,340 -45,14 +50,359 @@@ public class SingleColumnRelationTest e
          execute("INSERT INTO %s (a, b, c) VALUES (0, {0}, 0)");
  
          // non-EQ operators
 -        assertInvalid("SELECT * FROM %s WHERE c = 0 AND b > ?", set(0));
 -        assertInvalid("SELECT * FROM %s WHERE c = 0 AND b >= ?", set(0));
 -        assertInvalid("SELECT * FROM %s WHERE c = 0 AND b < ?", set(0));
 -        assertInvalid("SELECT * FROM %s WHERE c = 0 AND b <= ?", set(0));
 -        assertInvalid("SELECT * FROM %s WHERE c = 0 AND b IN (?)", set(0));
 +        assertInvalidMessage("Collection column 'b' (set<int>) cannot be restricted by a '>' relation",
 +                             "SELECT * FROM %s WHERE c = 0 AND b > ?", set(0));
 +        assertInvalidMessage("Collection column 'b' (set<int>) cannot be restricted by a '>=' relation",
 +                             "SELECT * FROM %s WHERE c = 0 AND b >= ?", set(0));
 +        assertInvalidMessage("Collection column 'b' (set<int>) cannot be restricted by a '<' relation",
 +                             "SELECT * FROM %s WHERE c = 0 AND b < ?", set(0));
 +        assertInvalidMessage("Collection column 'b' (set<int>) cannot be restricted by a '<=' relation",
 +                             "SELECT * FROM %s WHERE c = 0 AND b <= ?", set(0));
 +        assertInvalidMessage("Collection column 'b' (set<int>) cannot be restricted by a 'IN' relation",
 +                             "SELECT * FROM %s WHERE c = 0 AND b IN (?)", set(0));
 +    }
 +
 +    @Test
 +    public void testClusteringColumnRelations() throws Throwable
 +    {
 +        createTable("CREATE TABLE %s (a text, b int, c int, d int, primary key(a, b, c))");
 +        execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "first", 1, 5, 1);
 +        execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "first", 2, 6, 2);
 +        execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "first", 3, 7, 3);
 +        execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "second", 4, 8, 4);
 +
 +        testSelectQueriesWithClusteringColumnRelations();
 +    }
 +
 +    @Test
 +    public void testClusteringColumnRelationsWithCompactStorage() throws Throwable
 +    {
 +        createTable("CREATE TABLE %s (a text, b int, c int, d int, primary key(a, b, c)) WITH COMPACT STORAGE;");
 +        execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "first", 1, 5, 1);
 +        execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "first", 2, 6, 2);
 +        execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "first", 3, 7, 3);
 +        execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "second", 4, 8, 4);
 +
 +        testSelectQueriesWithClusteringColumnRelations();
 +    }
 +
 +    private void testSelectQueriesWithClusteringColumnRelations() throws Throwable
 +    {
 +        assertRows(execute("select * from %s where a in (?, ?)", "first", "second"),
 +                   row("first", 1, 5, 1),
 +                   row("first", 2, 6, 2),
 +                   row("first", 3, 7, 3),
 +                   row("second", 4, 8, 4));
 +
 +        assertRows(execute("select * from %s where a = ? and b = ? and c in (?, ?)", "first", 2, 6, 7),
 +                   row("first", 2, 6, 2));
 +
 +        assertRows(execute("select * from %s where a = ? and b in (?, ?) and c in (?, ?)", "first", 2, 3, 6, 7),
 +                   row("first", 2, 6, 2),
 +                   row("first", 3, 7, 3));
 +
 +        assertRows(execute("select * from %s where a = ? and b in (?, ?) and c in (?, ?)", "first", 3, 2, 7, 6),
 +                   row("first", 2, 6, 2),
 +                   row("first", 3, 7, 3));
 +
 +        assertRows(execute("select * from %s where a = ? and c in (?, ?) and b in (?, ?)", "first", 7, 6, 3, 2),
 +                   row("first", 2, 6, 2),
 +                   row("first", 3, 7, 3));
 +
 +        assertRows(execute("select c, d from %s where a = ? and c in (?, ?) and b in (?, ?)", "first", 7, 6, 3, 2),
 +                   row(6, 2),
 +                   row(7, 3));
 +
 +        assertRows(execute("select c, d from %s where a = ? and c in (?, ?) and b in (?, ?, ?)", "first", 7, 6, 3, 2, 3),
 +                   row(6, 2),
 +                   row(7, 3));
 +
 +        assertRows(execute("select * from %s where a = ? and b in (?, ?) and c = ?", "first", 3, 2, 7),
 +                   row("first", 3, 7, 3));
 +
 +        assertRows(execute("select * from %s where a = ? and b in ? and c in ?",
 +                           "first", Arrays.asList(3, 2), Arrays.asList(7, 6)),
 +                   row("first", 2, 6, 2),
 +                   row("first", 3, 7, 3));
 +
 +        assertInvalidMessage("Invalid null value for IN restriction",
 +                             "select * from %s where a = ? and b in ? and c in ?", "first", null, Arrays.asList(7, 6));
 +
 +        assertRows(execute("select * from %s where a = ? and c >= ? and b in (?, ?)", "first", 6, 3, 2),
 +                   row("first", 2, 6, 2),
 +                   row("first", 3, 7, 3));
 +
 +        assertRows(execute("select * from %s where a = ? and c > ? and b in (?, ?)", "first", 6, 3, 2),
 +                   row("first", 3, 7, 3));
 +
 +        assertRows(execute("select * from %s where a = ? and c <= ? and b in (?, ?)", "first", 6, 3, 2),
 +                   row("first", 2, 6, 2));
 +
 +        assertRows(execute("select * from %s where a = ? and c < ? and b in (?, ?)", "first", 7, 3, 2),
 +                   row("first", 2, 6, 2));
 +//---
 +        assertRows(execute("select * from %s where a = ? and c >= ? and c <= ? and b in (?, ?)", "first", 6, 7, 3, 2),
 +                   row("first", 2, 6, 2),
 +                   row("first", 3, 7, 3));
 +
 +        assertRows(execute("select * from %s where a = ? and c > ? and c <= ? and b in (?, ?)", "first", 6, 7, 3, 2),
 +                   row("first", 3, 7, 3));
 +
 +        assertEmpty(execute("select * from %s where a = ? and c > ? and c < ? and b in (?, ?)", "first", 6, 7, 3, 2));
 +
 +        assertInvalidMessage("Column \"c\" cannot be restricted by both an equality and an inequality relation",
 +                             "select * from %s where a = ? and c > ? and c = ? and b in (?, ?)", "first", 6, 7, 3, 2);
 +
 +        assertInvalidMessage("c cannot be restricted by more than one relation if it includes an Equal",
 +                             "select * from %s where a = ? and c = ? and c > ?  and b in (?, ?)", "first", 6, 7, 3, 2);
 +
 +        assertRows(execute("select * from %s where a = ? and c in (?, ?) and b in (?, ?) order by b DESC",
 +                           "first", 7, 6, 3, 2),
 +                   row("first", 3, 7, 3),
 +                   row("first", 2, 6, 2));
 +
 +        assertInvalidMessage("More than one restriction was found for the start bound on b",
 +                             "select * from %s where a = ? and b > ? and b > ?", "first", 6, 3, 2);
 +
 +        assertInvalidMessage("More than one restriction was found for the end bound on b",
 +                             "select * from %s where a = ? and b < ? and b <= ?", "first", 6, 3, 2);
 +    }
 +
 +    @Test
 +    public void testPartitionKeyColumnRelations() throws Throwable
 +    {
 +        createTable("CREATE TABLE %s (a text, b int, c int, d int, primary key((a, b), c))");
 +        execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "first", 1, 1, 1);
 +        execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "first", 2, 2, 2);
 +        execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "first", 3, 3, 3);
 +        execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "second", 4, 4, 4);
 +
 +        assertInvalidMessage("Partition KEY part a cannot be restricted by IN relation (only the last part of the partition key can)",
 +                             "select * from %s where a in (?, ?)", "first", "second");
 +        assertInvalidMessage("Partition KEY part a cannot be restricted by IN relation (only the last part of the partition key can)",
 +                             "select * from %s where a in (?, ?) and b in (?, ?)", "first", "second", 2, 3);
 +        assertInvalidMessage("Partition key parts: b must be restricted as other parts are",
 +                             "select * from %s where a = ?", "first");
 +        assertInvalidMessage("b cannot be restricted by more than one relation if it includes a IN",
 +                             "select * from %s where a = ? AND b IN (?, ?) AND b = ?", "first", 2, 2, 3);
 +        assertInvalidMessage("b cannot be restricted by more than one relation if it includes an Equal",
 +                             "select * from %s where a = ? AND b = ? AND b IN (?, ?)", "first", 2, 2, 3);
 +    }
 +
 +    @Test
 +    public void testClusteringColumnRelationsWithClusteringOrder() throws Throwable
 +    {
 +        createTable("CREATE TABLE %s (a text, b int, c int, d int, primary key(a, b, c)) WITH CLUSTERING ORDER BY (b DESC);");
 +        execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "first", 1, 5, 1);
 +        execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "first", 2, 6, 2);
 +        execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "first", 3, 7, 3);
 +        execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "second", 4, 8, 4);
 +
 +        assertRows(execute("select * from %s where a = ? and c in (?, ?) and b in (?, ?) order by b DESC",
 +                           "first", 7, 6, 3, 2),
 +                   row("first", 3, 7, 3),
 +                   row("first", 2, 6, 2));
 +
 +        assertRows(execute("select * from %s where a = ? and c in (?, ?) and b in (?, ?) order by b ASC",
 +                           "first", 7, 6, 3, 2),
 +                   row("first", 2, 6, 2),
 +                   row("first", 3, 7, 3));
 +    }
 +
 +    @Test
 +    public void testAllowFilteringWithClusteringColumn() throws Throwable
 +    {
 +        createTable("CREATE TABLE %s (k int, c int, v int, PRIMARY KEY (k, c))");
 +
 +        execute("INSERT INTO %s (k, c, v) VALUES(?, ?, ?)", 1, 2, 1);
 +        execute("INSERT INTO %s (k, c, v) VALUES(?, ?, ?)", 1, 3, 2);
 +        execute("INSERT INTO %s (k, c, v) VALUES(?, ?, ?)", 2, 2, 3);
 +
 +        // Don't require filtering, always allowed
 +        assertRows(execute("SELECT * FROM %s WHERE k = ?", 1),
 +                   row(1, 2, 1),
 +                   row(1, 3, 2));
 +
 +        assertRows(execute("SELECT * FROM %s WHERE k = ? AND c > ?", 1, 2), row(1, 3, 2));
 +
 +        assertRows(execute("SELECT * FROM %s WHERE k = ? AND c = ?", 1, 2), row(1, 2, 1));
 +
 +        assertRows(execute("SELECT * FROM %s WHERE k = ? ALLOW FILTERING", 1),
 +                   row(1, 2, 1),
 +                   row(1, 3, 2));
 +
 +        assertRows(execute("SELECT * FROM %s WHERE k = ? AND c > ? ALLOW FILTERING", 1, 2), row(1, 3, 2));
 +
 +        assertRows(execute("SELECT * FROM %s WHERE k = ? AND c = ? ALLOW FILTERING", 1, 2), row(1, 2, 1));
 +
 +        // Require filtering, allowed only with ALLOW FILTERING
 +        assertInvalidMessage("Cannot execute this query as it might involve data filtering",
 +                             "SELECT * FROM %s WHERE c = ?", 2);
 +        assertInvalidMessage("Cannot execute this query as it might involve data filtering",
 +                             "SELECT * FROM %s WHERE c > ? AND c <= ?", 2, 4);
 +
 +        assertRows(execute("SELECT * FROM %s WHERE c = ? ALLOW FILTERING", 2),
 +                   row(1, 2, 1),
 +                   row(2, 2, 3));
 +
 +        assertRows(execute("SELECT * FROM %s WHERE c > ? AND c <= ? ALLOW FILTERING", 2, 4), row(1, 3, 2));
 +    }
 +
 +    @Test
 +    public void testAllowFilteringWithIndexedColumn() throws Throwable
 +    {
 +        createTable("CREATE TABLE %s (k int PRIMARY KEY, a int, b int)");
 +        createIndex("CREATE INDEX ON %s(a)");
 +
 +        execute("INSERT INTO %s(k, a, b) VALUES(?, ?, ?)", 1, 10, 100);
 +        execute("INSERT INTO %s(k, a, b) VALUES(?, ?, ?)", 2, 20, 200);
 +        execute("INSERT INTO %s(k, a, b) VALUES(?, ?, ?)", 3, 30, 300);
 +        execute("INSERT INTO %s(k, a, b) VALUES(?, ?, ?)", 4, 40, 400);
 +
 +        // Don't require filtering, always allowed
 +        assertRows(execute("SELECT * FROM %s WHERE k = ?", 1), row(1, 10, 100));
 +        assertRows(execute("SELECT * FROM %s WHERE a = ?", 20), row(2, 20, 200));
 +        assertRows(execute("SELECT * FROM %s WHERE k = ? ALLOW FILTERING", 1), row(1, 10, 100));
 +        assertRows(execute("SELECT * FROM %s WHERE a = ? ALLOW FILTERING", 20), row(2, 20, 200));
 +
 +        assertInvalid("SELECT * FROM %s WHERE a = ? AND b = ?");
 +        assertRows(execute("SELECT * FROM %s WHERE a = ? AND b = ? ALLOW FILTERING", 20, 200), row(2, 20, 200));
 +    }
 +
 +    @Test
 +    public void testIndexQueriesOnComplexPrimaryKey() throws Throwable
 +    {
 +        createTable("CREATE TABLE %s (pk0 int, pk1 int, ck0 int, ck1 int, ck2 int, value int, PRIMARY KEY ((pk0, pk1), ck0, ck1, ck2))");
 +
 +        createIndex("CREATE INDEX ON %s (ck1)");
 +        createIndex("CREATE INDEX ON %s (ck2)");
 +        createIndex("CREATE INDEX ON %s (pk0)");
 +        createIndex("CREATE INDEX ON %s (ck0)");
 +
 +        execute("INSERT INTO %s (pk0, pk1, ck0, ck1, ck2, value) VALUES (?, ?, ?, ?, ?, ?)", 0, 1, 2, 3, 4, 5);
 +        execute("INSERT INTO %s (pk0, pk1, ck0, ck1, ck2, value) VALUES (?, ?, ?, ?, ?, ?)", 1, 2, 3, 4, 5, 0);
 +        execute("INSERT INTO %s (pk0, pk1, ck0, ck1, ck2, value) VALUES (?, ?, ?, ?, ?, ?)", 2, 3, 4, 5, 0, 1);
 +        execute("INSERT INTO %s (pk0, pk1, ck0, ck1, ck2, value) VALUES (?, ?, ?, ?, ?, ?)", 3, 4, 5, 0, 1, 2);
 +        execute("INSERT INTO %s (pk0, pk1, ck0, ck1, ck2, value) VALUES (?, ?, ?, ?, ?, ?)", 4, 5, 0, 1, 2, 3);
 +        execute("INSERT INTO %s (pk0, pk1, ck0, ck1, ck2, value) VALUES (?, ?, ?, ?, ?, ?)", 5, 0, 1, 2, 3, 4);
 +
 +        assertRows(execute("SELECT value FROM %s WHERE pk0 = 2"), row(1));
 +        assertRows(execute("SELECT value FROM %s WHERE ck0 = 0"), row(3));
 +        assertRows(execute("SELECT value FROM %s WHERE pk0 = 3 AND pk1 = 4 AND ck1 = 0"), row(2));
 +        assertRows(execute("SELECT value FROM %s WHERE pk0 = 5 AND pk1 = 0 AND ck0 = 1 AND ck2 = 3 ALLOW FILTERING"), row(4));
 +    }
 +
 +    @Test
 +    public void testIndexOnClusteringColumns() throws Throwable
 +    {
 +        createTable("CREATE TABLE %s (id1 int, id2 int, author text, time bigint, v1 text, v2 text, PRIMARY KEY ((id1, id2), author, time))");
 +        createIndex("CREATE INDEX ON %s(time)");
 +        createIndex("CREATE INDEX ON %s(id2)");
 +
 +        execute("INSERT INTO %s(id1, id2, author, time, v1, v2) VALUES(0, 0, 'bob', 0, 'A', 'A')");
 +        execute("INSERT INTO %s(id1, id2, author, time, v1, v2) VALUES(0, 0, 'bob', 1, 'B', 'B')");
 +        execute("INSERT INTO %s(id1, id2, author, time, v1, v2) VALUES(0, 1, 'bob', 2, 'C', 'C')");
 +        execute("INSERT INTO %s(id1, id2, author, time, v1, v2) VALUES(0, 0, 'tom', 0, 'D', 'D')");
 +        execute("INSERT INTO %s(id1, id2, author, time, v1, v2) VALUES(0, 1, 'tom', 1, 'E', 'E')");
 +
 +        assertRows(execute("SELECT v1 FROM %s WHERE time = 1"), row("B"), row("E"));
 +
 +        assertRows(execute("SELECT v1 FROM %s WHERE id2 = 1"), row("C"), row("E"));
 +
 +        assertRows(execute("SELECT v1 FROM %s WHERE id1 = 0 AND id2 = 0 AND author = 'bob' AND time = 0"), row("A"));
 +
 +        // Test for CASSANDRA-8206
 +        execute("UPDATE %s SET v2 = null WHERE id1 = 0 AND id2 = 0 AND author = 'bob' AND time = 1");
 +
 +        assertRows(execute("SELECT v1 FROM %s WHERE id2 = 0"), row("A"), row("B"), row("D"));
 +
 +        assertRows(execute("SELECT v1 FROM %s WHERE time = 1"), row("B"), row("E"));
 +
 +        assertInvalidMessage("IN restrictions are not supported on indexed columns",
 +                             "SELECT v1 FROM %s WHERE id2 = 0 and time IN (1, 2) ALLOW FILTERING");
 +    }
 +
 +    @Test
 +    public void testCompositeIndexWithPrimaryKey() throws Throwable
 +    {
 +        createTable("CREATE TABLE %s (blog_id int, time1 int, time2 int, author text, content text, PRIMARY KEY (blog_id, time1, time2))");
 +
 +        createIndex("CREATE INDEX ON %s(author)");
 +
 +        String req = "INSERT INTO %s (blog_id, time1, time2, author, content) VALUES (?, ?, ?, ?, ?)";
 +        execute(req, 1, 0, 0, "foo", "bar1");
 +        execute(req, 1, 0, 1, "foo", "bar2");
 +        execute(req, 2, 1, 0, "foo", "baz");
 +        execute(req, 3, 0, 1, "gux", "qux");
 +
 +        assertRows(execute("SELECT blog_id, content FROM %s WHERE author='foo'"),
 +                   row(1, "bar1"),
 +                   row(1, "bar2"),
 +                   row(2, "baz"));
 +        assertRows(execute("SELECT blog_id, content FROM %s WHERE time1 > 0 AND author='foo' ALLOW FILTERING"), row(2, "baz"));
 +        assertRows(execute("SELECT blog_id, content FROM %s WHERE time1 = 1 AND author='foo' ALLOW FILTERING"), row(2, "baz"));
 +        assertRows(execute("SELECT blog_id, content FROM %s WHERE time1 = 1 AND time2 = 0 AND author='foo' ALLOW FILTERING"),
 +                   row(2, "baz"));
 +        assertEmpty(execute("SELECT content FROM %s WHERE time1 = 1 AND time2 = 1 AND author='foo' ALLOW FILTERING"));
 +        assertEmpty(execute("SELECT content FROM %s WHERE time1 = 1 AND time2 > 0 AND author='foo' ALLOW FILTERING"));
 +
 +        assertInvalidMessage("Cannot execute this query as it might involve data filtering",
 +                             "SELECT content FROM %s WHERE time2 >= 0 AND author='foo'");
 +    }
 +
 +    @Test
 +    public void testRangeQueryOnIndex() throws Throwable
 +    {
 +        createTable("CREATE TABLE %s (id int primary key, row int, setid int);");
 +        createIndex("CREATE INDEX ON %s (setid)");
 +
 +        String q = "INSERT INTO %s (id, row, setid) VALUES (?, ?, ?);";
 +        execute(q, 0, 0, 0);
 +        execute(q, 1, 1, 0);
 +        execute(q, 2, 2, 0);
 +        execute(q, 3, 3, 0);
 +
 +        assertInvalidMessage("Cannot execute this query as it might involve data filtering",
 +                             "SELECT * FROM %s WHERE setid = 0 AND row < 1;");
 +        assertRows(execute("SELECT * FROM %s WHERE setid = 0 AND row < 1 ALLOW FILTERING;"), row(0, 0, 0));
 +    }
 +
 +    @Test
 +    public void testEmptyIN() throws Throwable
 +    {
 +        for (String compactOption : new String[] { "", " WITH COMPACT STORAGE" })
 +        {
 +            createTable("CREATE TABLE %s (k1 int, k2 int, v int, PRIMARY KEY (k1, k2))" + compactOption);
 +
 +            for (int i = 0; i <= 2; i++)
 +                for (int j = 0; j <= 2; j++)
 +                    execute("INSERT INTO %s (k1, k2, v) VALUES (?, ?, ?)", i, j, i + j);
 +
 +            assertEmpty(execute("SELECT v FROM %s WHERE k1 IN ()"));
 +            assertEmpty(execute("SELECT v FROM %s WHERE k1 = 0 AND k2 IN ()"));
 +        }
 +    }
 +
 +    @Test
++    public void testINWithDuplicateValue() throws Throwable
++    {
++        for (String compactOption : new String[] { "", " WITH COMPACT STORAGE" })
++        {
++            createTable("CREATE TABLE %s (k1 int, k2 int, v int, PRIMARY KEY (k1, k2))" + compactOption);
++            execute("INSERT INTO %s (k1,  k2, v) VALUES (?, ?, ?)", 1, 1, 1);
++
++            assertRows(execute("SELECT * FROM %s WHERE k1 IN (?, ?)", 1, 1),
++                       row(1, 1, 1));
++
++            assertRows(execute("SELECT * FROM %s WHERE k1 IN (?, ?) AND k2 IN (?, ?)", 1, 1, 1, 1),
++                       row(1, 1, 1));
++
++            assertRows(execute("SELECT * FROM %s WHERE k1 = ? AND k2 IN (?, ?)", 1, 1, 1),
++                       row(1, 1, 1));
++        }
+     }
+ 
+     @Test
      public void testLargeClusteringINValues() throws Throwable
      {
          createTable("CREATE TABLE %s (k int, c int, v int, PRIMARY KEY (k, c))");
@@@ -392,7 -61,7 +411,6 @@@
          for (int i = 0; i < 10000; i++)
              inValues.add(i);
          assertRows(execute("SELECT * FROM %s WHERE k=? AND c IN ?", 0, inValues),
--                row(0, 0, 0)
--        );
++                row(0, 0, 0));
      }
  }


[2/3] cassandra git commit: Duplicate rows returned when in clause has repeated values

Posted by sn...@apache.org.
Duplicate rows returned when in clause has repeated values

Patch by Benjamin Lerer, reviewed by Robert Stupp for CASSANDRA-6706


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/0c2eaa9c
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/0c2eaa9c
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/0c2eaa9c

Branch: refs/heads/trunk
Commit: 0c2eaa9cbb51f064b439c4d098adb5aa76b65b0f
Parents: faf9181
Author: Benjamin Lerer <b_...@hotmail.com>
Authored: Wed Jan 21 20:12:55 2015 +0100
Committer: Robert Stupp <sn...@snazy.de>
Committed: Wed Jan 21 20:12:55 2015 +0100

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../cql3/statements/SelectStatement.java        | 25 ++++++++++++++++++++
 2 files changed, 26 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/0c2eaa9c/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index f1eaa77..9cd8189 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 2.1.3
+ * Duplicate rows returned when in clause has repeated values (CASSANDRA-6707)
  * Add tooling to detect hot partitions (CASSANDRA-7974)
  * Fix cassandra-stress user-mode truncation of partition generation (CASSANDRA-8608)
  * Only stream from unrepaired sstables during inc repair (CASSANDRA-8267)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/0c2eaa9c/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
index 30259dd..633d43c 100644
--- a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
+++ b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
@@ -65,6 +65,13 @@ public class SelectStatement implements CQLStatement
 
     private static final int DEFAULT_COUNT_PAGE_SIZE = 10000;
 
+    /**
+     * In the current version a query containing duplicate values in an IN restriction on the partition key will
+     * cause the same record to be returned multiple time. This behavior will be changed in 3.0 but until then
+     * we will log a warning the first time this problem occurs.
+     */
+    private static volatile boolean HAS_LOGGED_WARNING_FOR_IN_RESTRICTION_WITH_DUPLICATES;
+
     private final int boundTerms;
     public final CFMetaData cfm;
     public final Parameters parameters;
@@ -588,6 +595,13 @@ public class SelectStatement implements CQLStatement
 
             if (builder.remainingCount() == 1)
             {
+                if (values.size() > 1 && !HAS_LOGGED_WARNING_FOR_IN_RESTRICTION_WITH_DUPLICATES  && containsDuplicates(values))
+                {
+                    // This approach does not fully prevent race conditions but it is not a big deal.
+                    HAS_LOGGED_WARNING_FOR_IN_RESTRICTION_WITH_DUPLICATES = true;
+                    logger.warn("SELECT queries with IN restrictions on the partition key containing duplicate values will return duplicate rows.");
+                }
+
                 for (ByteBuffer val : values)
                 {
                     if (val == null)
@@ -609,6 +623,17 @@ public class SelectStatement implements CQLStatement
         return keys;
     }
 
+    /**
+     * Checks if the specified list contains duplicate values.
+     *
+     * @param values the values to check
+     * @return <code>true</code> if the specified list contains duplicate values, <code>false</code> otherwise.
+     */
+    private static boolean containsDuplicates(List<ByteBuffer> values)
+    {
+        return new HashSet<>(values).size() < values.size();
+    }
+
     private ByteBuffer getKeyBound(Bound b, QueryOptions options) throws InvalidRequestException
     {
         // Deal with unrestricted partition key components (special-casing is required to deal with 2i queries on the first