You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by bd...@apache.org on 2018/11/06 19:25:27 UTC

[2/6] cassandra git commit: Sstable min/max metadata can cause data loss

Sstable min/max metadata can cause data loss

Patch by Blake Eggleston; Reviewed by Sam Tunnicliffe for CASSANDRA-14861


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/d60c7835
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/d60c7835
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/d60c7835

Branch: refs/heads/cassandra-3.11
Commit: d60c78358b6f599a83f3c112bfd6ce72c1129c9f
Parents: e4bac44
Author: Blake Eggleston <bd...@gmail.com>
Authored: Wed Oct 31 15:55:48 2018 -0700
Committer: Blake Eggleston <bd...@gmail.com>
Committed: Tue Nov 6 11:17:06 2018 -0800

----------------------------------------------------------------------
 CHANGES.txt                                     |   1 +
 src/java/org/apache/cassandra/db/Slice.java     |  25 +-----
 .../cassandra/io/sstable/format/Version.java    |   2 +
 .../io/sstable/format/big/BigFormat.java        |   9 ++
 .../io/sstable/metadata/MetadataCollector.java  |  26 ++----
 .../io/sstable/metadata/StatsMetadata.java      |  14 ++-
 .../mc-1-big-CompressionInfo.db                 | Bin 0 -> 43 bytes
 .../mc-1-big-Data.db                            | Bin 0 -> 65 bytes
 .../mc-1-big-Digest.crc32                       |   1 +
 .../mc-1-big-Filter.db                          | Bin 0 -> 16 bytes
 .../mc-1-big-Index.db                           | Bin 0 -> 8 bytes
 .../mc-1-big-Statistics.db                      | Bin 0 -> 4789 bytes
 .../mc-1-big-Summary.db                         | Bin 0 -> 56 bytes
 .../mc-1-big-TOC.txt                            |   8 ++
 .../db/SinglePartitionSliceCommandTest.java     |  87 +++++++++++++++++++
 .../cassandra/io/sstable/LegacySSTableTest.java |  35 +++++++-
 16 files changed, 165 insertions(+), 43 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/d60c7835/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index cc8e348..0fb1b86 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 3.0.18
+ * Sstable min/max metadata can cause data loss (CASSANDRA-14861)
  * Dropped columns can cause reverse sstable iteration to return prematurely (CASSANDRA-14838)
  * Legacy sstables with  multi block range tombstones create invalid bound sequences (CASSANDRA-14823)
  * Expand range tombstone validation checks to multiple interim request stages (CASSANDRA-14824)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d60c7835/src/java/org/apache/cassandra/db/Slice.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/Slice.java b/src/java/org/apache/cassandra/db/Slice.java
index 3c645dc..f90c195 100644
--- a/src/java/org/apache/cassandra/db/Slice.java
+++ b/src/java/org/apache/cassandra/db/Slice.java
@@ -248,29 +248,8 @@ public class Slice
      */
     public boolean intersects(ClusteringComparator comparator, List<ByteBuffer> minClusteringValues, List<ByteBuffer> maxClusteringValues)
     {
-        // If this slice start after max or end before min, it can't intersect
-        if (start.compareTo(comparator, maxClusteringValues) > 0 || end.compareTo(comparator, minClusteringValues) < 0)
-            return false;
-
-        // We could safely return true here, but there's a minor optimization: if the first component
-        // of the slice is restricted to a single value (typically the slice is [4:5, 4:7]), we can
-        // check that the second component falls within the min/max for that component (and repeat for
-        // all components).
-        for (int j = 0; j < minClusteringValues.size() && j < maxClusteringValues.size(); j++)
-        {
-            ByteBuffer s = j < start.size() ? start.get(j) : null;
-            ByteBuffer f = j < end.size() ? end.get(j) : null;
-
-            // we already know the first component falls within its min/max range (otherwise we wouldn't get here)
-            if (j > 0 && (j < end.size() && comparator.compareComponent(j, f, minClusteringValues.get(j)) < 0 ||
-                        j < start.size() && comparator.compareComponent(j, s, maxClusteringValues.get(j)) > 0))
-                return false;
-
-            // if this component isn't equal in the start and finish, we don't need to check any more
-            if (j >= start.size() || j >= end.size() || comparator.compareComponent(j, s, f) != 0)
-                break;
-        }
-        return true;
+        // If this slice starts after max clustering or ends before min clustering, it can't intersect
+        return start.compareTo(comparator, maxClusteringValues) <= 0 && end.compareTo(comparator, minClusteringValues) >= 0;
     }
 
     public String toString(CFMetaData metadata)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d60c7835/src/java/org/apache/cassandra/io/sstable/format/Version.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/io/sstable/format/Version.java b/src/java/org/apache/cassandra/io/sstable/format/Version.java
index 96c5a6e..2b9dcbd 100644
--- a/src/java/org/apache/cassandra/io/sstable/format/Version.java
+++ b/src/java/org/apache/cassandra/io/sstable/format/Version.java
@@ -74,6 +74,8 @@ public abstract class Version
 
     public abstract boolean hasCommitLogIntervals();
 
+    public abstract boolean hasAccurateMinMax();
+
     public String getVersion()
     {
         return version;

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d60c7835/src/java/org/apache/cassandra/io/sstable/format/big/BigFormat.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/io/sstable/format/big/BigFormat.java b/src/java/org/apache/cassandra/io/sstable/format/big/BigFormat.java
index 16f0beb..d4549dd 100644
--- a/src/java/org/apache/cassandra/io/sstable/format/big/BigFormat.java
+++ b/src/java/org/apache/cassandra/io/sstable/format/big/BigFormat.java
@@ -126,6 +126,7 @@ public class BigFormat implements SSTableFormat
         //             store rows natively
         // mb (3.0.7, 3.7): commit log lower bound included
         // mc (3.0.8, 3.9): commit log intervals included
+        // md (3.0.18, 3.11.4): corrected sstable min/max clustering
         //
         // NOTE: when adding a new version, please add that to LegacySSTableTest, too.
 
@@ -147,6 +148,7 @@ public class BigFormat implements SSTableFormat
         private final boolean hasOldBfHashOrder;
         private final boolean hasCommitLogLowerBound;
         private final boolean hasCommitLogIntervals;
+        private final boolean hasAccurateMinMax;
 
         /**
          * CASSANDRA-7066: compaction ancerstors are no longer used and have been removed.
@@ -189,6 +191,7 @@ public class BigFormat implements SSTableFormat
             hasCommitLogLowerBound = (version.compareTo("lb") >= 0 && version.compareTo("ma") < 0)
                                      || version.compareTo("mb") >= 0;
             hasCommitLogIntervals = version.compareTo("mc") >= 0;
+            hasAccurateMinMax = version.compareTo("md") >= 0;
         }
 
         @Override
@@ -264,6 +267,12 @@ public class BigFormat implements SSTableFormat
         }
 
         @Override
+        public boolean hasAccurateMinMax()
+        {
+            return hasAccurateMinMax;
+        }
+
+        @Override
         public boolean storeRows()
         {
             return storeRows;

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d60c7835/src/java/org/apache/cassandra/io/sstable/metadata/MetadataCollector.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/io/sstable/metadata/MetadataCollector.java b/src/java/org/apache/cassandra/io/sstable/metadata/MetadataCollector.java
index 487a932..f48d0a6 100644
--- a/src/java/org/apache/cassandra/io/sstable/metadata/MetadataCollector.java
+++ b/src/java/org/apache/cassandra/io/sstable/metadata/MetadataCollector.java
@@ -17,15 +17,14 @@
  */
 package org.apache.cassandra.io.sstable.metadata;
 
-import java.io.File;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 
+import com.google.common.base.Preconditions;
 import com.google.common.collect.Maps;
 
 import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus;
@@ -96,8 +95,8 @@ public class MetadataCollector implements PartitionStatisticsCollector
     protected double compressionRatio = NO_COMPRESSION_RATIO;
     protected StreamingHistogram.StreamingHistogramBuilder estimatedTombstoneDropTime = defaultTombstoneDropTimeHistogramBuilder();
     protected int sstableLevel;
-    protected ByteBuffer[] minClusteringValues;
-    protected ByteBuffer[] maxClusteringValues;
+    private ClusteringPrefix minClustering = Slice.Bound.TOP;
+    private ClusteringPrefix maxClustering = Slice.Bound.BOTTOM;
     protected boolean hasLegacyCounterShards = false;
     protected long totalColumnsSet;
     protected long totalRows;
@@ -115,8 +114,6 @@ public class MetadataCollector implements PartitionStatisticsCollector
     {
         this.comparator = comparator;
 
-        this.minClusteringValues = new ByteBuffer[comparator.size()];
-        this.maxClusteringValues = new ByteBuffer[comparator.size()];
     }
 
     public MetadataCollector(Iterable<SSTableReader> sstables, ClusteringComparator comparator, int level)
@@ -231,14 +228,8 @@ public class MetadataCollector implements PartitionStatisticsCollector
 
     public MetadataCollector updateClusteringValues(ClusteringPrefix clustering)
     {
-        int size = clustering.size();
-        for (int i = 0; i < size; i++)
-        {
-            AbstractType<?> type = comparator.subtype(i);
-            ByteBuffer newValue = clustering.get(i);
-            minClusteringValues[i] = maybeMinimize(min(minClusteringValues[i], newValue, type));
-            maxClusteringValues[i] = maybeMinimize(max(maxClusteringValues[i], newValue, type));
-        }
+        minClustering = comparator.compare(clustering, minClustering) < 0 ? clustering : minClustering;
+        maxClustering = comparator.compare(clustering, maxClustering) > 0 ? clustering : maxClustering;
         return this;
     }
 
@@ -280,6 +271,7 @@ public class MetadataCollector implements PartitionStatisticsCollector
 
     public Map<MetadataType, MetadataComponent> finalizeMetadata(String partitioner, double bloomFilterFPChance, long repairedAt, SerializationHeader header)
     {
+        Preconditions.checkState(comparator.compare(maxClustering, minClustering) >= 0);
         Map<MetadataType, MetadataComponent> components = Maps.newHashMap();
         components.put(MetadataType.VALIDATION, new ValidationMetadata(partitioner, bloomFilterFPChance));
         components.put(MetadataType.STATS, new StatsMetadata(estimatedPartitionSize,
@@ -294,8 +286,8 @@ public class MetadataCollector implements PartitionStatisticsCollector
                                                              compressionRatio,
                                                              estimatedTombstoneDropTime.build(),
                                                              sstableLevel,
-                                                             makeList(minClusteringValues),
-                                                             makeList(maxClusteringValues),
+                                                             makeList(minClustering.getRawValues()),
+                                                             makeList(maxClustering.getRawValues()),
                                                              hasLegacyCounterShards,
                                                              repairedAt,
                                                              totalColumnsSet,

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d60c7835/src/java/org/apache/cassandra/io/sstable/metadata/StatsMetadata.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/io/sstable/metadata/StatsMetadata.java b/src/java/org/apache/cassandra/io/sstable/metadata/StatsMetadata.java
index 9971eaa..1994bca 100644
--- a/src/java/org/apache/cassandra/io/sstable/metadata/StatsMetadata.java
+++ b/src/java/org/apache/cassandra/io/sstable/metadata/StatsMetadata.java
@@ -319,15 +319,25 @@ public class StatsMetadata extends MetadataComponent
             if (version.hasRepairedAt())
                 repairedAt = in.readLong();
 
+            // for legacy sstables, we skip deserializing the min and max clustering value
+            // to prevent erroneously excluding sstables from reads (see CASSANDRA-14861)
             int colCount = in.readInt();
             List<ByteBuffer> minClusteringValues = new ArrayList<>(colCount);
             for (int i = 0; i < colCount; i++)
-                minClusteringValues.add(ByteBufferUtil.readWithShortLength(in));
+            {
+                ByteBuffer val = ByteBufferUtil.readWithShortLength(in);
+                if (version.hasAccurateMinMax())
+                    minClusteringValues.add(val);
+            }
 
             colCount = in.readInt();
             List<ByteBuffer> maxClusteringValues = new ArrayList<>(colCount);
             for (int i = 0; i < colCount; i++)
-                maxClusteringValues.add(ByteBufferUtil.readWithShortLength(in));
+            {
+                ByteBuffer val = ByteBufferUtil.readWithShortLength(in);
+                if (version.hasAccurateMinMax())
+                    maxClusteringValues.add(val);
+            }
 
             boolean hasLegacyCounterShards = true;
             if (version.tracksLegacyCounterShards())

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d60c7835/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-CompressionInfo.db
----------------------------------------------------------------------
diff --git a/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-CompressionInfo.db b/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-CompressionInfo.db
new file mode 100644
index 0000000..df694ed
Binary files /dev/null and b/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-CompressionInfo.db differ

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d60c7835/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-Data.db
----------------------------------------------------------------------
diff --git a/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-Data.db b/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-Data.db
new file mode 100644
index 0000000..e3e3637
Binary files /dev/null and b/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-Data.db differ

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d60c7835/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-Digest.crc32
----------------------------------------------------------------------
diff --git a/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-Digest.crc32 b/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-Digest.crc32
new file mode 100644
index 0000000..394acb4
--- /dev/null
+++ b/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-Digest.crc32
@@ -0,0 +1 @@
+4091794686
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d60c7835/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-Filter.db
----------------------------------------------------------------------
diff --git a/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-Filter.db b/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-Filter.db
new file mode 100644
index 0000000..b58e394
Binary files /dev/null and b/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-Filter.db differ

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d60c7835/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-Index.db
----------------------------------------------------------------------
diff --git a/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-Index.db b/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-Index.db
new file mode 100644
index 0000000..e27f0f6
Binary files /dev/null and b/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-Index.db differ

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d60c7835/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-Statistics.db
----------------------------------------------------------------------
diff --git a/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-Statistics.db b/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-Statistics.db
new file mode 100644
index 0000000..491277f
Binary files /dev/null and b/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-Statistics.db differ

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d60c7835/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-Summary.db
----------------------------------------------------------------------
diff --git a/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-Summary.db b/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-Summary.db
new file mode 100644
index 0000000..7756279
Binary files /dev/null and b/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-Summary.db differ

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d60c7835/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-TOC.txt
----------------------------------------------------------------------
diff --git a/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-TOC.txt b/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-TOC.txt
new file mode 100644
index 0000000..52b155b
--- /dev/null
+++ b/test/data/legacy-sstables/mc/legacy_tables/legacy_mc_inaccurate_min_max/mc-1-big-TOC.txt
@@ -0,0 +1,8 @@
+Digest.crc32
+CompressionInfo.db
+TOC.txt
+Summary.db
+Statistics.db
+Index.db
+Data.db
+Filter.db

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d60c7835/test/unit/org/apache/cassandra/db/SinglePartitionSliceCommandTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/db/SinglePartitionSliceCommandTest.java b/test/unit/org/apache/cassandra/db/SinglePartitionSliceCommandTest.java
index b1a374f..2891687 100644
--- a/test/unit/org/apache/cassandra/db/SinglePartitionSliceCommandTest.java
+++ b/test/unit/org/apache/cassandra/db/SinglePartitionSliceCommandTest.java
@@ -24,8 +24,14 @@ import static org.junit.Assert.*;
 
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Iterator;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
+import com.google.common.collect.Iterables;
+import com.google.common.primitives.Ints;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.BeforeClass;
@@ -39,8 +45,10 @@ import org.apache.cassandra.config.CFMetaData;
 import org.apache.cassandra.config.ColumnDefinition;
 import org.apache.cassandra.config.Schema;
 import org.apache.cassandra.cql3.ColumnIdentifier;
+import org.apache.cassandra.cql3.QueryOptions;
 import org.apache.cassandra.cql3.QueryProcessor;
 import org.apache.cassandra.cql3.UntypedResultSet;
+import org.apache.cassandra.cql3.statements.SelectStatement;
 import org.apache.cassandra.db.filter.AbstractClusteringIndexFilter;
 import org.apache.cassandra.db.filter.ClusteringIndexNamesFilter;
 import org.apache.cassandra.db.filter.ClusteringIndexSliceFilter;
@@ -50,6 +58,7 @@ import org.apache.cassandra.db.filter.RowFilter;
 import org.apache.cassandra.db.marshal.Int32Type;
 import org.apache.cassandra.db.marshal.IntegerType;
 import org.apache.cassandra.db.marshal.UTF8Type;
+import org.apache.cassandra.db.partitions.PartitionUpdate;
 import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator;
 import org.apache.cassandra.db.rows.Cell;
 import org.apache.cassandra.db.rows.RangeTombstoneMarker;
@@ -62,6 +71,7 @@ import org.apache.cassandra.io.util.DataInputPlus;
 import org.apache.cassandra.io.util.DataOutputBuffer;
 import org.apache.cassandra.net.MessagingService;
 import org.apache.cassandra.schema.KeyspaceParams;
+import org.apache.cassandra.service.ClientState;
 import org.apache.cassandra.utils.ByteBufferUtil;
 import org.apache.cassandra.utils.FBUtilities;
 import org.apache.cassandra.utils.btree.BTreeSet;
@@ -378,4 +388,81 @@ public class SinglePartitionSliceCommandTest
         Assert.assertNotNull(ret);
         Assert.assertFalse(ret.isEmpty());
     }
+
+
+    public static List<Unfiltered> getUnfilteredsFromSinglePartition(String q)
+    {
+        SelectStatement stmt = (SelectStatement) QueryProcessor.parseStatement(q).prepare(ClientState.forInternalCalls()).statement;
+
+        List<Unfiltered> unfiltereds = new ArrayList<>();
+        SinglePartitionReadCommand.Group query = (SinglePartitionReadCommand.Group) stmt.getQuery(QueryOptions.DEFAULT, FBUtilities.nowInSeconds());
+        Assert.assertEquals(1, query.commands.size());
+        SinglePartitionReadCommand command = Iterables.getOnlyElement(query.commands);
+        try (ReadOrderGroup group = ReadOrderGroup.forCommand(command);
+             UnfilteredPartitionIterator partitions = command.executeLocally(group))
+        {
+            assert partitions.hasNext();
+            try (UnfilteredRowIterator partition = partitions.next())
+            {
+                while (partition.hasNext())
+                {
+                    Unfiltered next = partition.next();
+                    unfiltereds.add(next);
+                }
+            }
+            assert !partitions.hasNext();
+        }
+        return unfiltereds;
+    }
+
+    private static void assertQueryReturnsSingleRT(String query)
+    {
+        List<Unfiltered> unfiltereds = getUnfilteredsFromSinglePartition(query);
+        Assert.assertEquals(2, unfiltereds.size());
+        Assert.assertTrue(unfiltereds.get(0).isRangeTombstoneMarker());
+        Assert.assertTrue(((RangeTombstoneMarker) unfiltereds.get(0)).isOpen(false));
+        Assert.assertTrue(unfiltereds.get(1).isRangeTombstoneMarker());
+        Assert.assertTrue(((RangeTombstoneMarker) unfiltereds.get(1)).isClose(false));
+    }
+
+    private static ByteBuffer bb(int v)
+    {
+        return Int32Type.instance.decompose(v);
+    }
+
+    /**
+     * tests the bug raised in CASSANDRA-14861, where the sstable min/max can
+     * exclude range tombstones for clustering ranges not also covered by rows
+     */
+    @Test
+    public void sstableFiltering()
+    {
+        QueryProcessor.executeOnceInternal("CREATE TABLE ks.legacy_mc_inaccurate_min_max (k int, c1 int, c2 int, c3 int, v int, primary key (k, c1, c2, c3))");
+        CFMetaData metadata = Schema.instance.getCFMetaData("ks", "legacy_mc_inaccurate_min_max");
+        ColumnFamilyStore cfs = Schema.instance.getColumnFamilyStoreInstance(metadata.cfId);
+
+        QueryProcessor.executeOnceInternal("INSERT INTO ks.legacy_mc_inaccurate_min_max (k, c1, c2, c3, v) VALUES (100, 2, 2, 2, 2)");
+        QueryProcessor.executeOnceInternal("DELETE FROM ks.legacy_mc_inaccurate_min_max WHERE k=100 AND c1=1");
+        assertQueryReturnsSingleRT("SELECT * FROM ks.legacy_mc_inaccurate_min_max WHERE k=100 AND c1=1 AND c2=1");
+        cfs.forceBlockingFlush();
+        assertQueryReturnsSingleRT("SELECT * FROM ks.legacy_mc_inaccurate_min_max WHERE k=100 AND c1=1 AND c2=1");
+
+        assertQueryReturnsSingleRT("SELECT * FROM ks.legacy_mc_inaccurate_min_max WHERE k=100 AND c1=1 AND c2=1 AND c3=1"); // clustering names
+
+        cfs.truncateBlocking();
+
+        long nowMillis = System.currentTimeMillis();
+        Slice slice = Slice.make(new Clustering(bb(2), bb(3)), new Clustering(bb(10), bb(10)));
+        RangeTombstone rt = new RangeTombstone(slice, new DeletionTime(TimeUnit.MILLISECONDS.toMicros(nowMillis),
+                                                                       Ints.checkedCast(TimeUnit.MILLISECONDS.toSeconds(nowMillis))));
+        PartitionUpdate update = new PartitionUpdate(cfs.metadata, bb(100), cfs.metadata.partitionColumns(), 1);
+        update.add(rt);
+        new Mutation(update).apply();
+
+        assertQueryReturnsSingleRT("SELECT * FROM ks.legacy_mc_inaccurate_min_max WHERE k=100 AND c1=3 AND c2=2");
+        cfs.forceBlockingFlush();
+        assertQueryReturnsSingleRT("SELECT * FROM ks.legacy_mc_inaccurate_min_max WHERE k=100 AND c1=3 AND c2=2");
+        assertQueryReturnsSingleRT("SELECT * FROM ks.legacy_mc_inaccurate_min_max WHERE k=100 AND c1=3 AND c2=2 AND c3=2"); // clustering names
+
+    }
 }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d60c7835/test/unit/org/apache/cassandra/io/sstable/LegacySSTableTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/io/sstable/LegacySSTableTest.java b/test/unit/org/apache/cassandra/io/sstable/LegacySSTableTest.java
index d58ce8a..5c65b31 100644
--- a/test/unit/org/apache/cassandra/io/sstable/LegacySSTableTest.java
+++ b/test/unit/org/apache/cassandra/io/sstable/LegacySSTableTest.java
@@ -25,6 +25,7 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.Random;
 
+import com.google.common.collect.Iterables;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.BeforeClass;
@@ -35,11 +36,20 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import org.apache.cassandra.SchemaLoader;
+import org.apache.cassandra.cql3.QueryOptions;
 import org.apache.cassandra.cql3.QueryProcessor;
 import org.apache.cassandra.cql3.UntypedResultSet;
+import org.apache.cassandra.cql3.statements.SelectStatement;
 import org.apache.cassandra.db.ColumnFamilyStore;
 import org.apache.cassandra.db.Keyspace;
+import org.apache.cassandra.db.ReadOrderGroup;
+import org.apache.cassandra.db.SinglePartitionReadCommand;
+import org.apache.cassandra.db.SinglePartitionSliceCommandTest;
 import org.apache.cassandra.db.compaction.Verifier;
+import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator;
+import org.apache.cassandra.db.rows.RangeTombstoneMarker;
+import org.apache.cassandra.db.rows.Unfiltered;
+import org.apache.cassandra.db.rows.UnfilteredRowIterator;
 import org.apache.cassandra.dht.IPartitioner;
 import org.apache.cassandra.dht.Range;
 import org.apache.cassandra.dht.Token;
@@ -49,6 +59,7 @@ import org.apache.cassandra.io.sstable.format.SSTableReader;
 import org.apache.cassandra.io.sstable.format.Version;
 import org.apache.cassandra.io.sstable.format.big.BigFormat;
 import org.apache.cassandra.service.CacheService;
+import org.apache.cassandra.service.ClientState;
 import org.apache.cassandra.service.StorageService;
 import org.apache.cassandra.streaming.StreamPlan;
 import org.apache.cassandra.streaming.StreamSession;
@@ -257,6 +268,28 @@ public class LegacySSTableTest
         }
     }
 
+
+    @Test
+    public void testInaccurateSSTableMinMax() throws Exception
+    {
+        QueryProcessor.executeInternal("CREATE TABLE legacy_tables.legacy_mc_inaccurate_min_max (k int, c1 int, c2 int, c3 int, v int, primary key (k, c1, c2, c3))");
+        loadLegacyTable("legacy_%s_inaccurate_min_max%s", "mc", "");
+
+        /*
+         sstable has the following mutations:
+            INSERT INTO legacy_tables.legacy_mc_inaccurate_min_max (k, c1, c2, c3, v) VALUES (100, 4, 4, 4, 4)
+            DELETE FROM legacy_tables.legacy_mc_inaccurate_min_max WHERE k=100 AND c1<3
+         */
+
+        String query = "SELECT * FROM legacy_tables.legacy_mc_inaccurate_min_max WHERE k=100 AND c1=1 AND c2=1";
+        List<Unfiltered> unfiltereds = SinglePartitionSliceCommandTest.getUnfilteredsFromSinglePartition(query);
+        Assert.assertEquals(2, unfiltereds.size());
+        Assert.assertTrue(unfiltereds.get(0).isRangeTombstoneMarker());
+        Assert.assertTrue(((RangeTombstoneMarker) unfiltereds.get(0)).isOpen(false));
+        Assert.assertTrue(unfiltereds.get(1).isRangeTombstoneMarker());
+        Assert.assertTrue(((RangeTombstoneMarker) unfiltereds.get(1)).isClose(false));
+    }
+
     @Test
     public void testVerifyOldSSTables() throws Exception
     {
@@ -524,7 +557,7 @@ public class LegacySSTableTest
         }
     }
 
-    private void copySstablesFromTestData(String table, File ksDir) throws IOException
+    public static void copySstablesFromTestData(String table, File ksDir) throws IOException
     {
         File cfDir = new File(ksDir, table);
         cfDir.mkdir();


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@cassandra.apache.org
For additional commands, e-mail: commits-help@cassandra.apache.org