You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by sn...@apache.org on 2015/01/21 20:25:25 UTC

[2/3] cassandra git commit: Duplicate rows returned when in clause has repeated values

Duplicate rows returned when in clause has repeated values

Patch by Benjamin Lerer, reviewed by Robert Stupp for CASSANDRA-6706


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/0c2eaa9c
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/0c2eaa9c
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/0c2eaa9c

Branch: refs/heads/trunk
Commit: 0c2eaa9cbb51f064b439c4d098adb5aa76b65b0f
Parents: faf9181
Author: Benjamin Lerer <b_...@hotmail.com>
Authored: Wed Jan 21 20:12:55 2015 +0100
Committer: Robert Stupp <sn...@snazy.de>
Committed: Wed Jan 21 20:12:55 2015 +0100

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../cql3/statements/SelectStatement.java        | 25 ++++++++++++++++++++
 2 files changed, 26 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/0c2eaa9c/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index f1eaa77..9cd8189 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 2.1.3
+ * Duplicate rows returned when in clause has repeated values (CASSANDRA-6707)
  * Add tooling to detect hot partitions (CASSANDRA-7974)
  * Fix cassandra-stress user-mode truncation of partition generation (CASSANDRA-8608)
  * Only stream from unrepaired sstables during inc repair (CASSANDRA-8267)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/0c2eaa9c/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
index 30259dd..633d43c 100644
--- a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
+++ b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
@@ -65,6 +65,13 @@ public class SelectStatement implements CQLStatement
 
     private static final int DEFAULT_COUNT_PAGE_SIZE = 10000;
 
+    /**
+     * In the current version a query containing duplicate values in an IN restriction on the partition key will
+     * cause the same record to be returned multiple time. This behavior will be changed in 3.0 but until then
+     * we will log a warning the first time this problem occurs.
+     */
+    private static volatile boolean HAS_LOGGED_WARNING_FOR_IN_RESTRICTION_WITH_DUPLICATES;
+
     private final int boundTerms;
     public final CFMetaData cfm;
     public final Parameters parameters;
@@ -588,6 +595,13 @@ public class SelectStatement implements CQLStatement
 
             if (builder.remainingCount() == 1)
             {
+                if (values.size() > 1 && !HAS_LOGGED_WARNING_FOR_IN_RESTRICTION_WITH_DUPLICATES  && containsDuplicates(values))
+                {
+                    // This approach does not fully prevent race conditions but it is not a big deal.
+                    HAS_LOGGED_WARNING_FOR_IN_RESTRICTION_WITH_DUPLICATES = true;
+                    logger.warn("SELECT queries with IN restrictions on the partition key containing duplicate values will return duplicate rows.");
+                }
+
                 for (ByteBuffer val : values)
                 {
                     if (val == null)
@@ -609,6 +623,17 @@ public class SelectStatement implements CQLStatement
         return keys;
     }
 
+    /**
+     * Checks if the specified list contains duplicate values.
+     *
+     * @param values the values to check
+     * @return <code>true</code> if the specified list contains duplicate values, <code>false</code> otherwise.
+     */
+    private static boolean containsDuplicates(List<ByteBuffer> values)
+    {
+        return new HashSet<>(values).size() < values.size();
+    }
+
     private ByteBuffer getKeyBound(Bound b, QueryOptions options) throws InvalidRequestException
     {
         // Deal with unrestricted partition key components (special-casing is required to deal with 2i queries on the first