You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by sn...@apache.org on 2015/01/21 20:25:24 UTC
[1/3] cassandra git commit: Duplicate rows returned when in clause
has repeated values
Repository: cassandra
Updated Branches:
refs/heads/cassandra-2.1 faf91818b -> 0c2eaa9cb
refs/heads/trunk 0a09b87dc -> 732986bbd
Duplicate rows returned when in clause has repeated values
Patch by Benjamin Lerer, reviewed by Robert Stupp for CASSANDRA-6706
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/0c2eaa9c
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/0c2eaa9c
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/0c2eaa9c
Branch: refs/heads/cassandra-2.1
Commit: 0c2eaa9cbb51f064b439c4d098adb5aa76b65b0f
Parents: faf9181
Author: Benjamin Lerer <b_...@hotmail.com>
Authored: Wed Jan 21 20:12:55 2015 +0100
Committer: Robert Stupp <sn...@snazy.de>
Committed: Wed Jan 21 20:12:55 2015 +0100
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../cql3/statements/SelectStatement.java | 25 ++++++++++++++++++++
2 files changed, 26 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/0c2eaa9c/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index f1eaa77..9cd8189 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
2.1.3
+ * Duplicate rows returned when in clause has repeated values (CASSANDRA-6707)
* Add tooling to detect hot partitions (CASSANDRA-7974)
* Fix cassandra-stress user-mode truncation of partition generation (CASSANDRA-8608)
* Only stream from unrepaired sstables during inc repair (CASSANDRA-8267)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/0c2eaa9c/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
index 30259dd..633d43c 100644
--- a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
+++ b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
@@ -65,6 +65,13 @@ public class SelectStatement implements CQLStatement
private static final int DEFAULT_COUNT_PAGE_SIZE = 10000;
+ /**
+ * In the current version a query containing duplicate values in an IN restriction on the partition key will
+ * cause the same record to be returned multiple time. This behavior will be changed in 3.0 but until then
+ * we will log a warning the first time this problem occurs.
+ */
+ private static volatile boolean HAS_LOGGED_WARNING_FOR_IN_RESTRICTION_WITH_DUPLICATES;
+
private final int boundTerms;
public final CFMetaData cfm;
public final Parameters parameters;
@@ -588,6 +595,13 @@ public class SelectStatement implements CQLStatement
if (builder.remainingCount() == 1)
{
+ if (values.size() > 1 && !HAS_LOGGED_WARNING_FOR_IN_RESTRICTION_WITH_DUPLICATES && containsDuplicates(values))
+ {
+ // This approach does not fully prevent race conditions but it is not a big deal.
+ HAS_LOGGED_WARNING_FOR_IN_RESTRICTION_WITH_DUPLICATES = true;
+ logger.warn("SELECT queries with IN restrictions on the partition key containing duplicate values will return duplicate rows.");
+ }
+
for (ByteBuffer val : values)
{
if (val == null)
@@ -609,6 +623,17 @@ public class SelectStatement implements CQLStatement
return keys;
}
+ /**
+ * Checks if the specified list contains duplicate values.
+ *
+ * @param values the values to check
+ * @return <code>true</code> if the specified list contains duplicate values, <code>false</code> otherwise.
+ */
+ private static boolean containsDuplicates(List<ByteBuffer> values)
+ {
+ return new HashSet<>(values).size() < values.size();
+ }
+
private ByteBuffer getKeyBound(Bound b, QueryOptions options) throws InvalidRequestException
{
// Deal with unrestricted partition key components (special-casing is required to deal with 2i queries on the first
[3/3] cassandra git commit: Merge branch 'cassandra-2.1' into trunk
Posted by sn...@apache.org.
Merge branch 'cassandra-2.1' into trunk
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/732986bb
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/732986bb
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/732986bb
Branch: refs/heads/trunk
Commit: 732986bbd02a5acafaf97de24ea555c8d426b873
Parents: 0a09b87 0c2eaa9
Author: Benjamin Lerer <b_...@hotmail.com>
Authored: Wed Jan 21 20:19:42 2015 +0100
Committer: Robert Stupp <sn...@snazy.de>
Committed: Wed Jan 21 20:19:42 2015 +0100
----------------------------------------------------------------------
CHANGES.txt | 3 ++-
.../cassandra/cql3/MultiColumnRelationTest.java | 15 +++++++++++++
.../cql3/SingleColumnRelationTest.java | 22 ++++++++++++++++++--
3 files changed, 37 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/732986bb/CHANGES.txt
----------------------------------------------------------------------
diff --cc CHANGES.txt
index 9343d66,9cd8189..cc6478e
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@@ -1,58 -1,5 +1,59 @@@
-2.1.3
+3.0
+ * Duplicate rows returned when in clause has repeated values (CASSANDRA-6707)
+ * Make CassandraException unchecked, extend RuntimeException (CASSANDRA-8560)
+ * Support direct buffer decompression for reads (CASSANDRA-8464)
+ * DirectByteBuffer compatible LZ4 methods (CASSANDRA-7039)
+ * Add role based access control (CASSANDRA-7653)
+ * Group sstables for anticompaction correctly (CASSANDRA-8578)
+ * Add ReadFailureException to native protocol, respond
+ immediately when replicas encounter errors while handling
+ a read request (CASSANDRA-7886)
+ * Switch CommitLogSegment from RandomAccessFile to nio (CASSANDRA-8308)
+ * Allow mixing token and partition key restrictions (CASSANDRA-7016)
+ * Support index key/value entries on map collections (CASSANDRA-8473)
+ * Modernize schema tables (CASSANDRA-8261)
+ * Support for user-defined aggregation functions (CASSANDRA-8053)
+ * Fix NPE in SelectStatement with empty IN values (CASSANDRA-8419)
+ * Refactor SelectStatement, return IN results in natural order instead
- of IN value list order (CASSANDRA-7981)
++ of IN value list order and ignore duplicate values in partition key IN restrictions (CASSANDRA-7981)
+ * Support UDTs, tuples, and collections in user-defined
+ functions (CASSANDRA-7563)
+ * Fix aggregate fn results on empty selection, result column name,
+ and cqlsh parsing (CASSANDRA-8229)
+ * Mark sstables as repaired after full repair (CASSANDRA-7586)
+ * Extend Descriptor to include a format value and refactor reader/writer
+ APIs (CASSANDRA-7443)
+ * Integrate JMH for microbenchmarks (CASSANDRA-8151)
+ * Keep sstable levels when bootstrapping (CASSANDRA-7460)
+ * Add Sigar library and perform basic OS settings check on startup (CASSANDRA-7838)
+ * Support for aggregation functions (CASSANDRA-4914)
+ * Remove cassandra-cli (CASSANDRA-7920)
+ * Accept dollar quoted strings in CQL (CASSANDRA-7769)
+ * Make assassinate a first class command (CASSANDRA-7935)
+ * Support IN clause on any clustering column (CASSANDRA-4762)
+ * Improve compaction logging (CASSANDRA-7818)
+ * Remove YamlFileNetworkTopologySnitch (CASSANDRA-7917)
+ * Do anticompaction in groups (CASSANDRA-6851)
+ * Support user-defined functions (CASSANDRA-7395, 7526, 7562, 7740, 7781, 7929,
+ 7924, 7812, 8063, 7813, 7708)
+ * Permit configurable timestamps with cassandra-stress (CASSANDRA-7416)
+ * Move sstable RandomAccessReader to nio2, which allows using the
+ FILE_SHARE_DELETE flag on Windows (CASSANDRA-4050)
+ * Remove CQL2 (CASSANDRA-5918)
+ * Add Thrift get_multi_slice call (CASSANDRA-6757)
+ * Optimize fetching multiple cells by name (CASSANDRA-6933)
+ * Allow compilation in java 8 (CASSANDRA-7028)
+ * Make incremental repair default (CASSANDRA-7250)
+ * Enable code coverage thru JaCoCo (CASSANDRA-7226)
+ * Switch external naming of 'column families' to 'tables' (CASSANDRA-4369)
+ * Shorten SSTable path (CASSANDRA-6962)
+ * Use unsafe mutations for most unit tests (CASSANDRA-6969)
+ * Fix race condition during calculation of pending ranges (CASSANDRA-7390)
+ * Fail on very large batch sizes (CASSANDRA-8011)
+ * Improve concurrency of repair (CASSANDRA-6455, 8208)
+
+
+2.1.3
* Add tooling to detect hot partitions (CASSANDRA-7974)
* Fix cassandra-stress user-mode truncation of partition generation (CASSANDRA-8608)
* Only stream from unrepaired sstables during inc repair (CASSANDRA-8267)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/732986bb/test/unit/org/apache/cassandra/cql3/MultiColumnRelationTest.java
----------------------------------------------------------------------
diff --cc test/unit/org/apache/cassandra/cql3/MultiColumnRelationTest.java
index b178498,4c3ba2a..3e6e45b
--- a/test/unit/org/apache/cassandra/cql3/MultiColumnRelationTest.java
+++ b/test/unit/org/apache/cassandra/cql3/MultiColumnRelationTest.java
@@@ -583,81 -555,4 +583,96 @@@ public class MultiColumnRelationTest ex
assertEmpty(execute("SELECT * FROM %s WHERE a = ? AND (b, c) > (?, ?)", 0, 1, 0));
}
}
+
+ @Test
+ public void testMultipleClusteringWithIndex() throws Throwable
+ {
+ createTable("CREATE TABLE %s (a int, b int, c int, d int, e int, PRIMARY KEY (a, b, c, d))");
+ createIndex("CREATE INDEX ON %s (b)");
+ createIndex("CREATE INDEX ON %s (e)");
+
+ execute("INSERT INTO %s (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)", 0, 0, 0, 0, 0);
+ execute("INSERT INTO %s (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)", 0, 0, 1, 0, 1);
+ execute("INSERT INTO %s (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)", 0, 0, 1, 1, 2);
+
+ execute("INSERT INTO %s (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)", 0, 1, 0, 0, 0);
+ execute("INSERT INTO %s (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)", 0, 1, 1, 0, 1);
+ execute("INSERT INTO %s (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)", 0, 1, 1, 1, 2);
+
+ execute("INSERT INTO %s (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)", 0, 2, 0, 0, 0);
+
+ assertRows(execute("SELECT * FROM %s WHERE (b) = (?)", 1),
+ row(0, 1, 0, 0, 0),
+ row(0, 1, 1, 0, 1),
+ row(0, 1, 1, 1, 2));
+
+ assertRows(execute("SELECT * FROM %s WHERE (b, c) = (?, ?) ALLOW FILTERING", 1, 1),
+ row(0, 1, 1, 0, 1),
+ row(0, 1, 1, 1, 2));
+
+ assertRows(execute("SELECT * FROM %s WHERE (b, c) = (?, ?) AND e = ? ALLOW FILTERING", 1, 1, 2),
+ row(0, 1, 1, 1, 2));
+
+ assertRows(execute("SELECT * FROM %s WHERE (b) IN ((?)) AND e = ? ALLOW FILTERING", 1, 2),
+ row(0, 1, 1, 1, 2));
+
+ assertInvalidMessage("IN restrictions are not supported on indexed columns",
+ "SELECT * FROM %s WHERE (b) IN ((?), (?)) AND e = ? ALLOW FILTERING", 0, 1, 2);
+
+ assertInvalidMessage("IN restrictions are not supported on indexed columns",
+ "SELECT * FROM %s WHERE (b, c) IN ((?, ?)) AND e = ? ALLOW FILTERING", 0, 1, 2);
+
+ assertInvalidMessage("IN restrictions are not supported on indexed columns",
+ "SELECT * FROM %s WHERE (b, c) IN ((?, ?), (?, ?)) AND e = ? ALLOW FILTERING", 0, 1, 1, 1, 2);
+
+ assertInvalidMessage("Slice restrictions are not supported on indexed columns which are part of a multi column relation",
+ "SELECT * FROM %s WHERE (b) >= (?) AND e = ? ALLOW FILTERING", 1, 2);
+ }
+
+ @Test
+ public void testMultiplePartitionKeyAndMultiClusteringWithIndex() throws Throwable
+ {
+ createTable("CREATE TABLE %s (a int, b int, c int, d int, e int, PRIMARY KEY ((a, b), c, d, e))");
+ createIndex("CREATE INDEX ON %s (c)");
+
+ execute("INSERT INTO %s (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)", 0, 0, 0, 0, 0);
+ execute("INSERT INTO %s (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)", 0, 0, 0, 1, 0);
+ execute("INSERT INTO %s (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)", 0, 0, 0, 1, 1);
+
+ execute("INSERT INTO %s (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)", 0, 0, 1, 0, 0);
+ execute("INSERT INTO %s (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)", 0, 0, 1, 1, 0);
+ execute("INSERT INTO %s (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)", 0, 0, 1, 1, 1);
+
+ execute("INSERT INTO %s (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)", 0, 0, 2, 0, 0);
+
+ assertRows(execute("SELECT * FROM %s WHERE a = ? AND (c) = (?) ALLOW FILTERING", 0, 1),
+ row(0, 0, 1, 0, 0),
+ row(0, 0, 1, 1, 0),
+ row(0, 0, 1, 1, 1));
+
+ assertRows(execute("SELECT * FROM %s WHERE a = ? AND (c, d) = (?, ?) ALLOW FILTERING", 0, 1, 1),
+ row(0, 0, 1, 1, 0),
+ row(0, 0, 1, 1, 1));
+
+ assertInvalidMessage("Partition key parts: b must be restricted as other parts are",
+ "SELECT * FROM %s WHERE a = ? AND (c, d) IN ((?, ?)) ALLOW FILTERING", 0, 1, 1);
+
+ assertInvalidMessage("Partition key parts: b must be restricted as other parts are",
+ "SELECT * FROM %s WHERE a = ? AND (c, d) >= (?, ?) ALLOW FILTERING", 0, 1, 1);
+ }
++
++ @Test
++ public void testINWithDuplicateValue() throws Throwable
++ {
++ for (String compactOption : new String[] { "", " WITH COMPACT STORAGE" })
++ {
++ createTable("CREATE TABLE %s (k1 int, k2 int, v int, PRIMARY KEY (k1, k2))" + compactOption);
++ execute("INSERT INTO %s (k1, k2, v) VALUES (?, ?, ?)", 1, 1, 1);
++
++ assertRows(execute("SELECT * FROM %s WHERE k1 IN (?, ?) AND (k2) IN ((?), (?))", 1, 1, 1, 2),
++ row(1, 1, 1));
++ assertRows(execute("SELECT * FROM %s WHERE k1 = ? AND (k2) IN ((?), (?))", 1, 1, 1),
++ row(1, 1, 1));
++ }
++ }
}
http://git-wip-us.apache.org/repos/asf/cassandra/blob/732986bb/test/unit/org/apache/cassandra/cql3/SingleColumnRelationTest.java
----------------------------------------------------------------------
diff --cc test/unit/org/apache/cassandra/cql3/SingleColumnRelationTest.java
index 0fd300b,2ad4bda..f505a04
--- a/test/unit/org/apache/cassandra/cql3/SingleColumnRelationTest.java
+++ b/test/unit/org/apache/cassandra/cql3/SingleColumnRelationTest.java
@@@ -50,340 -45,14 +50,359 @@@ public class SingleColumnRelationTest e
execute("INSERT INTO %s (a, b, c) VALUES (0, {0}, 0)");
// non-EQ operators
- assertInvalid("SELECT * FROM %s WHERE c = 0 AND b > ?", set(0));
- assertInvalid("SELECT * FROM %s WHERE c = 0 AND b >= ?", set(0));
- assertInvalid("SELECT * FROM %s WHERE c = 0 AND b < ?", set(0));
- assertInvalid("SELECT * FROM %s WHERE c = 0 AND b <= ?", set(0));
- assertInvalid("SELECT * FROM %s WHERE c = 0 AND b IN (?)", set(0));
+ assertInvalidMessage("Collection column 'b' (set<int>) cannot be restricted by a '>' relation",
+ "SELECT * FROM %s WHERE c = 0 AND b > ?", set(0));
+ assertInvalidMessage("Collection column 'b' (set<int>) cannot be restricted by a '>=' relation",
+ "SELECT * FROM %s WHERE c = 0 AND b >= ?", set(0));
+ assertInvalidMessage("Collection column 'b' (set<int>) cannot be restricted by a '<' relation",
+ "SELECT * FROM %s WHERE c = 0 AND b < ?", set(0));
+ assertInvalidMessage("Collection column 'b' (set<int>) cannot be restricted by a '<=' relation",
+ "SELECT * FROM %s WHERE c = 0 AND b <= ?", set(0));
+ assertInvalidMessage("Collection column 'b' (set<int>) cannot be restricted by a 'IN' relation",
+ "SELECT * FROM %s WHERE c = 0 AND b IN (?)", set(0));
+ }
+
+ @Test
+ public void testClusteringColumnRelations() throws Throwable
+ {
+ createTable("CREATE TABLE %s (a text, b int, c int, d int, primary key(a, b, c))");
+ execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "first", 1, 5, 1);
+ execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "first", 2, 6, 2);
+ execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "first", 3, 7, 3);
+ execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "second", 4, 8, 4);
+
+ testSelectQueriesWithClusteringColumnRelations();
+ }
+
+ @Test
+ public void testClusteringColumnRelationsWithCompactStorage() throws Throwable
+ {
+ createTable("CREATE TABLE %s (a text, b int, c int, d int, primary key(a, b, c)) WITH COMPACT STORAGE;");
+ execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "first", 1, 5, 1);
+ execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "first", 2, 6, 2);
+ execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "first", 3, 7, 3);
+ execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "second", 4, 8, 4);
+
+ testSelectQueriesWithClusteringColumnRelations();
+ }
+
+ private void testSelectQueriesWithClusteringColumnRelations() throws Throwable
+ {
+ assertRows(execute("select * from %s where a in (?, ?)", "first", "second"),
+ row("first", 1, 5, 1),
+ row("first", 2, 6, 2),
+ row("first", 3, 7, 3),
+ row("second", 4, 8, 4));
+
+ assertRows(execute("select * from %s where a = ? and b = ? and c in (?, ?)", "first", 2, 6, 7),
+ row("first", 2, 6, 2));
+
+ assertRows(execute("select * from %s where a = ? and b in (?, ?) and c in (?, ?)", "first", 2, 3, 6, 7),
+ row("first", 2, 6, 2),
+ row("first", 3, 7, 3));
+
+ assertRows(execute("select * from %s where a = ? and b in (?, ?) and c in (?, ?)", "first", 3, 2, 7, 6),
+ row("first", 2, 6, 2),
+ row("first", 3, 7, 3));
+
+ assertRows(execute("select * from %s where a = ? and c in (?, ?) and b in (?, ?)", "first", 7, 6, 3, 2),
+ row("first", 2, 6, 2),
+ row("first", 3, 7, 3));
+
+ assertRows(execute("select c, d from %s where a = ? and c in (?, ?) and b in (?, ?)", "first", 7, 6, 3, 2),
+ row(6, 2),
+ row(7, 3));
+
+ assertRows(execute("select c, d from %s where a = ? and c in (?, ?) and b in (?, ?, ?)", "first", 7, 6, 3, 2, 3),
+ row(6, 2),
+ row(7, 3));
+
+ assertRows(execute("select * from %s where a = ? and b in (?, ?) and c = ?", "first", 3, 2, 7),
+ row("first", 3, 7, 3));
+
+ assertRows(execute("select * from %s where a = ? and b in ? and c in ?",
+ "first", Arrays.asList(3, 2), Arrays.asList(7, 6)),
+ row("first", 2, 6, 2),
+ row("first", 3, 7, 3));
+
+ assertInvalidMessage("Invalid null value for IN restriction",
+ "select * from %s where a = ? and b in ? and c in ?", "first", null, Arrays.asList(7, 6));
+
+ assertRows(execute("select * from %s where a = ? and c >= ? and b in (?, ?)", "first", 6, 3, 2),
+ row("first", 2, 6, 2),
+ row("first", 3, 7, 3));
+
+ assertRows(execute("select * from %s where a = ? and c > ? and b in (?, ?)", "first", 6, 3, 2),
+ row("first", 3, 7, 3));
+
+ assertRows(execute("select * from %s where a = ? and c <= ? and b in (?, ?)", "first", 6, 3, 2),
+ row("first", 2, 6, 2));
+
+ assertRows(execute("select * from %s where a = ? and c < ? and b in (?, ?)", "first", 7, 3, 2),
+ row("first", 2, 6, 2));
+//---
+ assertRows(execute("select * from %s where a = ? and c >= ? and c <= ? and b in (?, ?)", "first", 6, 7, 3, 2),
+ row("first", 2, 6, 2),
+ row("first", 3, 7, 3));
+
+ assertRows(execute("select * from %s where a = ? and c > ? and c <= ? and b in (?, ?)", "first", 6, 7, 3, 2),
+ row("first", 3, 7, 3));
+
+ assertEmpty(execute("select * from %s where a = ? and c > ? and c < ? and b in (?, ?)", "first", 6, 7, 3, 2));
+
+ assertInvalidMessage("Column \"c\" cannot be restricted by both an equality and an inequality relation",
+ "select * from %s where a = ? and c > ? and c = ? and b in (?, ?)", "first", 6, 7, 3, 2);
+
+ assertInvalidMessage("c cannot be restricted by more than one relation if it includes an Equal",
+ "select * from %s where a = ? and c = ? and c > ? and b in (?, ?)", "first", 6, 7, 3, 2);
+
+ assertRows(execute("select * from %s where a = ? and c in (?, ?) and b in (?, ?) order by b DESC",
+ "first", 7, 6, 3, 2),
+ row("first", 3, 7, 3),
+ row("first", 2, 6, 2));
+
+ assertInvalidMessage("More than one restriction was found for the start bound on b",
+ "select * from %s where a = ? and b > ? and b > ?", "first", 6, 3, 2);
+
+ assertInvalidMessage("More than one restriction was found for the end bound on b",
+ "select * from %s where a = ? and b < ? and b <= ?", "first", 6, 3, 2);
+ }
+
+ @Test
+ public void testPartitionKeyColumnRelations() throws Throwable
+ {
+ createTable("CREATE TABLE %s (a text, b int, c int, d int, primary key((a, b), c))");
+ execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "first", 1, 1, 1);
+ execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "first", 2, 2, 2);
+ execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "first", 3, 3, 3);
+ execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "second", 4, 4, 4);
+
+ assertInvalidMessage("Partition KEY part a cannot be restricted by IN relation (only the last part of the partition key can)",
+ "select * from %s where a in (?, ?)", "first", "second");
+ assertInvalidMessage("Partition KEY part a cannot be restricted by IN relation (only the last part of the partition key can)",
+ "select * from %s where a in (?, ?) and b in (?, ?)", "first", "second", 2, 3);
+ assertInvalidMessage("Partition key parts: b must be restricted as other parts are",
+ "select * from %s where a = ?", "first");
+ assertInvalidMessage("b cannot be restricted by more than one relation if it includes a IN",
+ "select * from %s where a = ? AND b IN (?, ?) AND b = ?", "first", 2, 2, 3);
+ assertInvalidMessage("b cannot be restricted by more than one relation if it includes an Equal",
+ "select * from %s where a = ? AND b = ? AND b IN (?, ?)", "first", 2, 2, 3);
+ }
+
+ @Test
+ public void testClusteringColumnRelationsWithClusteringOrder() throws Throwable
+ {
+ createTable("CREATE TABLE %s (a text, b int, c int, d int, primary key(a, b, c)) WITH CLUSTERING ORDER BY (b DESC);");
+ execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "first", 1, 5, 1);
+ execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "first", 2, 6, 2);
+ execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "first", 3, 7, 3);
+ execute("insert into %s (a, b, c, d) values (?, ?, ?, ?)", "second", 4, 8, 4);
+
+ assertRows(execute("select * from %s where a = ? and c in (?, ?) and b in (?, ?) order by b DESC",
+ "first", 7, 6, 3, 2),
+ row("first", 3, 7, 3),
+ row("first", 2, 6, 2));
+
+ assertRows(execute("select * from %s where a = ? and c in (?, ?) and b in (?, ?) order by b ASC",
+ "first", 7, 6, 3, 2),
+ row("first", 2, 6, 2),
+ row("first", 3, 7, 3));
+ }
+
+ @Test
+ public void testAllowFilteringWithClusteringColumn() throws Throwable
+ {
+ createTable("CREATE TABLE %s (k int, c int, v int, PRIMARY KEY (k, c))");
+
+ execute("INSERT INTO %s (k, c, v) VALUES(?, ?, ?)", 1, 2, 1);
+ execute("INSERT INTO %s (k, c, v) VALUES(?, ?, ?)", 1, 3, 2);
+ execute("INSERT INTO %s (k, c, v) VALUES(?, ?, ?)", 2, 2, 3);
+
+ // Don't require filtering, always allowed
+ assertRows(execute("SELECT * FROM %s WHERE k = ?", 1),
+ row(1, 2, 1),
+ row(1, 3, 2));
+
+ assertRows(execute("SELECT * FROM %s WHERE k = ? AND c > ?", 1, 2), row(1, 3, 2));
+
+ assertRows(execute("SELECT * FROM %s WHERE k = ? AND c = ?", 1, 2), row(1, 2, 1));
+
+ assertRows(execute("SELECT * FROM %s WHERE k = ? ALLOW FILTERING", 1),
+ row(1, 2, 1),
+ row(1, 3, 2));
+
+ assertRows(execute("SELECT * FROM %s WHERE k = ? AND c > ? ALLOW FILTERING", 1, 2), row(1, 3, 2));
+
+ assertRows(execute("SELECT * FROM %s WHERE k = ? AND c = ? ALLOW FILTERING", 1, 2), row(1, 2, 1));
+
+ // Require filtering, allowed only with ALLOW FILTERING
+ assertInvalidMessage("Cannot execute this query as it might involve data filtering",
+ "SELECT * FROM %s WHERE c = ?", 2);
+ assertInvalidMessage("Cannot execute this query as it might involve data filtering",
+ "SELECT * FROM %s WHERE c > ? AND c <= ?", 2, 4);
+
+ assertRows(execute("SELECT * FROM %s WHERE c = ? ALLOW FILTERING", 2),
+ row(1, 2, 1),
+ row(2, 2, 3));
+
+ assertRows(execute("SELECT * FROM %s WHERE c > ? AND c <= ? ALLOW FILTERING", 2, 4), row(1, 3, 2));
+ }
+
+ @Test
+ public void testAllowFilteringWithIndexedColumn() throws Throwable
+ {
+ createTable("CREATE TABLE %s (k int PRIMARY KEY, a int, b int)");
+ createIndex("CREATE INDEX ON %s(a)");
+
+ execute("INSERT INTO %s(k, a, b) VALUES(?, ?, ?)", 1, 10, 100);
+ execute("INSERT INTO %s(k, a, b) VALUES(?, ?, ?)", 2, 20, 200);
+ execute("INSERT INTO %s(k, a, b) VALUES(?, ?, ?)", 3, 30, 300);
+ execute("INSERT INTO %s(k, a, b) VALUES(?, ?, ?)", 4, 40, 400);
+
+ // Don't require filtering, always allowed
+ assertRows(execute("SELECT * FROM %s WHERE k = ?", 1), row(1, 10, 100));
+ assertRows(execute("SELECT * FROM %s WHERE a = ?", 20), row(2, 20, 200));
+ assertRows(execute("SELECT * FROM %s WHERE k = ? ALLOW FILTERING", 1), row(1, 10, 100));
+ assertRows(execute("SELECT * FROM %s WHERE a = ? ALLOW FILTERING", 20), row(2, 20, 200));
+
+ assertInvalid("SELECT * FROM %s WHERE a = ? AND b = ?");
+ assertRows(execute("SELECT * FROM %s WHERE a = ? AND b = ? ALLOW FILTERING", 20, 200), row(2, 20, 200));
+ }
+
+ @Test
+ public void testIndexQueriesOnComplexPrimaryKey() throws Throwable
+ {
+ createTable("CREATE TABLE %s (pk0 int, pk1 int, ck0 int, ck1 int, ck2 int, value int, PRIMARY KEY ((pk0, pk1), ck0, ck1, ck2))");
+
+ createIndex("CREATE INDEX ON %s (ck1)");
+ createIndex("CREATE INDEX ON %s (ck2)");
+ createIndex("CREATE INDEX ON %s (pk0)");
+ createIndex("CREATE INDEX ON %s (ck0)");
+
+ execute("INSERT INTO %s (pk0, pk1, ck0, ck1, ck2, value) VALUES (?, ?, ?, ?, ?, ?)", 0, 1, 2, 3, 4, 5);
+ execute("INSERT INTO %s (pk0, pk1, ck0, ck1, ck2, value) VALUES (?, ?, ?, ?, ?, ?)", 1, 2, 3, 4, 5, 0);
+ execute("INSERT INTO %s (pk0, pk1, ck0, ck1, ck2, value) VALUES (?, ?, ?, ?, ?, ?)", 2, 3, 4, 5, 0, 1);
+ execute("INSERT INTO %s (pk0, pk1, ck0, ck1, ck2, value) VALUES (?, ?, ?, ?, ?, ?)", 3, 4, 5, 0, 1, 2);
+ execute("INSERT INTO %s (pk0, pk1, ck0, ck1, ck2, value) VALUES (?, ?, ?, ?, ?, ?)", 4, 5, 0, 1, 2, 3);
+ execute("INSERT INTO %s (pk0, pk1, ck0, ck1, ck2, value) VALUES (?, ?, ?, ?, ?, ?)", 5, 0, 1, 2, 3, 4);
+
+ assertRows(execute("SELECT value FROM %s WHERE pk0 = 2"), row(1));
+ assertRows(execute("SELECT value FROM %s WHERE ck0 = 0"), row(3));
+ assertRows(execute("SELECT value FROM %s WHERE pk0 = 3 AND pk1 = 4 AND ck1 = 0"), row(2));
+ assertRows(execute("SELECT value FROM %s WHERE pk0 = 5 AND pk1 = 0 AND ck0 = 1 AND ck2 = 3 ALLOW FILTERING"), row(4));
+ }
+
+ @Test
+ public void testIndexOnClusteringColumns() throws Throwable
+ {
+ createTable("CREATE TABLE %s (id1 int, id2 int, author text, time bigint, v1 text, v2 text, PRIMARY KEY ((id1, id2), author, time))");
+ createIndex("CREATE INDEX ON %s(time)");
+ createIndex("CREATE INDEX ON %s(id2)");
+
+ execute("INSERT INTO %s(id1, id2, author, time, v1, v2) VALUES(0, 0, 'bob', 0, 'A', 'A')");
+ execute("INSERT INTO %s(id1, id2, author, time, v1, v2) VALUES(0, 0, 'bob', 1, 'B', 'B')");
+ execute("INSERT INTO %s(id1, id2, author, time, v1, v2) VALUES(0, 1, 'bob', 2, 'C', 'C')");
+ execute("INSERT INTO %s(id1, id2, author, time, v1, v2) VALUES(0, 0, 'tom', 0, 'D', 'D')");
+ execute("INSERT INTO %s(id1, id2, author, time, v1, v2) VALUES(0, 1, 'tom', 1, 'E', 'E')");
+
+ assertRows(execute("SELECT v1 FROM %s WHERE time = 1"), row("B"), row("E"));
+
+ assertRows(execute("SELECT v1 FROM %s WHERE id2 = 1"), row("C"), row("E"));
+
+ assertRows(execute("SELECT v1 FROM %s WHERE id1 = 0 AND id2 = 0 AND author = 'bob' AND time = 0"), row("A"));
+
+ // Test for CASSANDRA-8206
+ execute("UPDATE %s SET v2 = null WHERE id1 = 0 AND id2 = 0 AND author = 'bob' AND time = 1");
+
+ assertRows(execute("SELECT v1 FROM %s WHERE id2 = 0"), row("A"), row("B"), row("D"));
+
+ assertRows(execute("SELECT v1 FROM %s WHERE time = 1"), row("B"), row("E"));
+
+ assertInvalidMessage("IN restrictions are not supported on indexed columns",
+ "SELECT v1 FROM %s WHERE id2 = 0 and time IN (1, 2) ALLOW FILTERING");
+ }
+
+ @Test
+ public void testCompositeIndexWithPrimaryKey() throws Throwable
+ {
+ createTable("CREATE TABLE %s (blog_id int, time1 int, time2 int, author text, content text, PRIMARY KEY (blog_id, time1, time2))");
+
+ createIndex("CREATE INDEX ON %s(author)");
+
+ String req = "INSERT INTO %s (blog_id, time1, time2, author, content) VALUES (?, ?, ?, ?, ?)";
+ execute(req, 1, 0, 0, "foo", "bar1");
+ execute(req, 1, 0, 1, "foo", "bar2");
+ execute(req, 2, 1, 0, "foo", "baz");
+ execute(req, 3, 0, 1, "gux", "qux");
+
+ assertRows(execute("SELECT blog_id, content FROM %s WHERE author='foo'"),
+ row(1, "bar1"),
+ row(1, "bar2"),
+ row(2, "baz"));
+ assertRows(execute("SELECT blog_id, content FROM %s WHERE time1 > 0 AND author='foo' ALLOW FILTERING"), row(2, "baz"));
+ assertRows(execute("SELECT blog_id, content FROM %s WHERE time1 = 1 AND author='foo' ALLOW FILTERING"), row(2, "baz"));
+ assertRows(execute("SELECT blog_id, content FROM %s WHERE time1 = 1 AND time2 = 0 AND author='foo' ALLOW FILTERING"),
+ row(2, "baz"));
+ assertEmpty(execute("SELECT content FROM %s WHERE time1 = 1 AND time2 = 1 AND author='foo' ALLOW FILTERING"));
+ assertEmpty(execute("SELECT content FROM %s WHERE time1 = 1 AND time2 > 0 AND author='foo' ALLOW FILTERING"));
+
+ assertInvalidMessage("Cannot execute this query as it might involve data filtering",
+ "SELECT content FROM %s WHERE time2 >= 0 AND author='foo'");
+ }
+
+ @Test
+ public void testRangeQueryOnIndex() throws Throwable
+ {
+ createTable("CREATE TABLE %s (id int primary key, row int, setid int);");
+ createIndex("CREATE INDEX ON %s (setid)");
+
+ String q = "INSERT INTO %s (id, row, setid) VALUES (?, ?, ?);";
+ execute(q, 0, 0, 0);
+ execute(q, 1, 1, 0);
+ execute(q, 2, 2, 0);
+ execute(q, 3, 3, 0);
+
+ assertInvalidMessage("Cannot execute this query as it might involve data filtering",
+ "SELECT * FROM %s WHERE setid = 0 AND row < 1;");
+ assertRows(execute("SELECT * FROM %s WHERE setid = 0 AND row < 1 ALLOW FILTERING;"), row(0, 0, 0));
+ }
+
+ @Test
+ public void testEmptyIN() throws Throwable
+ {
+ for (String compactOption : new String[] { "", " WITH COMPACT STORAGE" })
+ {
+ createTable("CREATE TABLE %s (k1 int, k2 int, v int, PRIMARY KEY (k1, k2))" + compactOption);
+
+ for (int i = 0; i <= 2; i++)
+ for (int j = 0; j <= 2; j++)
+ execute("INSERT INTO %s (k1, k2, v) VALUES (?, ?, ?)", i, j, i + j);
+
+ assertEmpty(execute("SELECT v FROM %s WHERE k1 IN ()"));
+ assertEmpty(execute("SELECT v FROM %s WHERE k1 = 0 AND k2 IN ()"));
+ }
+ }
+
+ @Test
++ public void testINWithDuplicateValue() throws Throwable
++ {
++ for (String compactOption : new String[] { "", " WITH COMPACT STORAGE" })
++ {
++ createTable("CREATE TABLE %s (k1 int, k2 int, v int, PRIMARY KEY (k1, k2))" + compactOption);
++ execute("INSERT INTO %s (k1, k2, v) VALUES (?, ?, ?)", 1, 1, 1);
++
++ assertRows(execute("SELECT * FROM %s WHERE k1 IN (?, ?)", 1, 1),
++ row(1, 1, 1));
++
++ assertRows(execute("SELECT * FROM %s WHERE k1 IN (?, ?) AND k2 IN (?, ?)", 1, 1, 1, 1),
++ row(1, 1, 1));
++
++ assertRows(execute("SELECT * FROM %s WHERE k1 = ? AND k2 IN (?, ?)", 1, 1, 1),
++ row(1, 1, 1));
++ }
+ }
+
+ @Test
public void testLargeClusteringINValues() throws Throwable
{
createTable("CREATE TABLE %s (k int, c int, v int, PRIMARY KEY (k, c))");
@@@ -392,7 -61,7 +411,6 @@@
for (int i = 0; i < 10000; i++)
inValues.add(i);
assertRows(execute("SELECT * FROM %s WHERE k=? AND c IN ?", 0, inValues),
-- row(0, 0, 0)
-- );
++ row(0, 0, 0));
}
}
[2/3] cassandra git commit: Duplicate rows returned when in clause
has repeated values
Posted by sn...@apache.org.
Duplicate rows returned when in clause has repeated values
Patch by Benjamin Lerer, reviewed by Robert Stupp for CASSANDRA-6706
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/0c2eaa9c
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/0c2eaa9c
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/0c2eaa9c
Branch: refs/heads/trunk
Commit: 0c2eaa9cbb51f064b439c4d098adb5aa76b65b0f
Parents: faf9181
Author: Benjamin Lerer <b_...@hotmail.com>
Authored: Wed Jan 21 20:12:55 2015 +0100
Committer: Robert Stupp <sn...@snazy.de>
Committed: Wed Jan 21 20:12:55 2015 +0100
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../cql3/statements/SelectStatement.java | 25 ++++++++++++++++++++
2 files changed, 26 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/0c2eaa9c/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index f1eaa77..9cd8189 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
2.1.3
+ * Duplicate rows returned when in clause has repeated values (CASSANDRA-6707)
* Add tooling to detect hot partitions (CASSANDRA-7974)
* Fix cassandra-stress user-mode truncation of partition generation (CASSANDRA-8608)
* Only stream from unrepaired sstables during inc repair (CASSANDRA-8267)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/0c2eaa9c/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
index 30259dd..633d43c 100644
--- a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
+++ b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
@@ -65,6 +65,13 @@ public class SelectStatement implements CQLStatement
private static final int DEFAULT_COUNT_PAGE_SIZE = 10000;
+ /**
+ * In the current version a query containing duplicate values in an IN restriction on the partition key will
+ * cause the same record to be returned multiple time. This behavior will be changed in 3.0 but until then
+ * we will log a warning the first time this problem occurs.
+ */
+ private static volatile boolean HAS_LOGGED_WARNING_FOR_IN_RESTRICTION_WITH_DUPLICATES;
+
private final int boundTerms;
public final CFMetaData cfm;
public final Parameters parameters;
@@ -588,6 +595,13 @@ public class SelectStatement implements CQLStatement
if (builder.remainingCount() == 1)
{
+ if (values.size() > 1 && !HAS_LOGGED_WARNING_FOR_IN_RESTRICTION_WITH_DUPLICATES && containsDuplicates(values))
+ {
+ // This approach does not fully prevent race conditions but it is not a big deal.
+ HAS_LOGGED_WARNING_FOR_IN_RESTRICTION_WITH_DUPLICATES = true;
+ logger.warn("SELECT queries with IN restrictions on the partition key containing duplicate values will return duplicate rows.");
+ }
+
for (ByteBuffer val : values)
{
if (val == null)
@@ -609,6 +623,17 @@ public class SelectStatement implements CQLStatement
return keys;
}
+ /**
+ * Checks if the specified list contains duplicate values.
+ *
+ * @param values the values to check
+ * @return <code>true</code> if the specified list contains duplicate values, <code>false</code> otherwise.
+ */
+ private static boolean containsDuplicates(List<ByteBuffer> values)
+ {
+ return new HashSet<>(values).size() < values.size();
+ }
+
private ByteBuffer getKeyBound(Bound b, QueryOptions options) throws InvalidRequestException
{
// Deal with unrestricted partition key components (special-casing is required to deal with 2i queries on the first