You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by cl...@apache.org on 2020/09/24 16:06:58 UTC
[cassandra] branch cassandra-3.11 updated: Avoid failing compactions with very large partitions

This is an automated email from the ASF dual-hosted git repository.

clohfink pushed a commit to branch cassandra-3.11
in repository https://gitbox.apache.org/repos/asf/cassandra.git


The following commit(s) were added to refs/heads/cassandra-3.11 by this push:
     new 0361d53  Avoid failing compactions with very large partitions
0361d53 is described below

commit 0361d53f4c9d350654fbf733cfdb208d17922027
Author: Caleb Rackliffe <ca...@gmail.com>
AuthorDate: Thu Sep 24 10:56:14 2020 -0500

    Avoid failing compactions with very large partitions
    
    Patch by Caleb Rackliffe; Reviewed by Chris Lohfink and Benjamin Lerer for CASSANDRA-15164
---
 CHANGES.txt                                        |  1 +
 .../io/sstable/metadata/MetadataCollector.java     |  4 +--
 .../io/sstable/metadata/StatsMetadata.java         | 25 +++++++++++++++++
 .../apache/cassandra/utils/EstimatedHistogram.java | 31 +++++++++++++++++++---
 .../sstable/metadata/MetadataSerializerTest.java   | 29 ++++++++++++++++++++
 .../cassandra/utils/EstimatedHistogramTest.java    | 12 +++++++++
 .../apache/cassandra/utils/SerializationsTest.java |  2 +-
 7 files changed, 98 insertions(+), 6 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index 99083b1..b5b0cbc 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 3.11.9
+ * Avoid failing compactions with very large partitions (CASSANDRA-15164)
  * Make sure LCS handles duplicate sstable added/removed notifications correctly (CASSANDRA-14103)
 
 3.11.8
diff --git a/src/java/org/apache/cassandra/io/sstable/metadata/MetadataCollector.java b/src/java/org/apache/cassandra/io/sstable/metadata/MetadataCollector.java
index ea88a3f..3354e2b 100644
--- a/src/java/org/apache/cassandra/io/sstable/metadata/MetadataCollector.java
+++ b/src/java/org/apache/cassandra/io/sstable/metadata/MetadataCollector.java
@@ -51,8 +51,8 @@ public class MetadataCollector implements PartitionStatisticsCollector
 
     static EstimatedHistogram defaultCellPerPartitionCountHistogram()
     {
-        // EH of 114 can track a max value of 2395318855, i.e., > 2B columns
-        return new EstimatedHistogram(114);
+        // EH of 118 can track a max value of 4139110981, i.e., > 4B cells
+        return new EstimatedHistogram(118);
     }
 
     static EstimatedHistogram defaultPartitionSizeHistogram()
diff --git a/src/java/org/apache/cassandra/io/sstable/metadata/StatsMetadata.java b/src/java/org/apache/cassandra/io/sstable/metadata/StatsMetadata.java
index 94e8d41..042e6d3 100644
--- a/src/java/org/apache/cassandra/io/sstable/metadata/StatsMetadata.java
+++ b/src/java/org/apache/cassandra/io/sstable/metadata/StatsMetadata.java
@@ -26,6 +26,9 @@ import org.apache.cassandra.io.ISerializer;
 import org.apache.cassandra.io.sstable.format.Version;
 import org.apache.commons.lang3.builder.EqualsBuilder;
 import org.apache.commons.lang3.builder.HashCodeBuilder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import org.apache.cassandra.db.TypeSizes;
 import org.apache.cassandra.db.commitlog.CommitLogPosition;
 import org.apache.cassandra.db.commitlog.IntervalSet;
@@ -230,6 +233,8 @@ public class StatsMetadata extends MetadataComponent
 
     public static class StatsMetadataSerializer implements IMetadataComponentSerializer<StatsMetadata>
     {
+        private static final Logger logger = LoggerFactory.getLogger(StatsMetadataSerializer.class);
+
         public int serializedSize(Version version, StatsMetadata component) throws IOException
         {
             int size = 0;
@@ -302,9 +307,29 @@ public class StatsMetadata extends MetadataComponent
         public StatsMetadata deserialize(Version version, DataInputPlus in) throws IOException
         {
             EstimatedHistogram partitionSizes = EstimatedHistogram.serializer.deserialize(in);
+
+            if (partitionSizes.isOverflowed())
+            {
+                logger.warn("Deserialized partition size histogram with {} values greater than the maximum of {}. " +
+                            "Clearing the overflow bucket to allow for degraded mean and percentile calculations...",
+                            partitionSizes.overflowCount(), partitionSizes.getLargestBucketOffset());
+
+                partitionSizes.clearOverflow();
+            }
+
             EstimatedHistogram columnCounts = EstimatedHistogram.serializer.deserialize(in);
+            if (columnCounts.isOverflowed())
+            {
+                logger.warn("Deserialized partition cell count histogram with {} values greater than the maximum of {}. " +
+                            "Clearing the overflow bucket to allow for degraded mean and percentile calculations...",
+                            columnCounts.overflowCount(), columnCounts.getLargestBucketOffset());
+
+                columnCounts.clearOverflow();
+            }
+
             CommitLogPosition commitLogLowerBound = CommitLogPosition.NONE, commitLogUpperBound;
             commitLogUpperBound = CommitLogPosition.serializer.deserialize(in);
+
             long minTimestamp = in.readLong();
             long maxTimestamp = in.readLong();
             // We use MAX_VALUE as that's the default value for "no deletion time"
diff --git a/src/java/org/apache/cassandra/utils/EstimatedHistogram.java b/src/java/org/apache/cassandra/utils/EstimatedHistogram.java
index 0914a58..2c1faa0 100644
--- a/src/java/org/apache/cassandra/utils/EstimatedHistogram.java
+++ b/src/java/org/apache/cassandra/utils/EstimatedHistogram.java
@@ -22,12 +22,13 @@ import java.util.Arrays;
 import java.util.concurrent.atomic.AtomicLongArray;
 
 import com.google.common.base.Objects;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import org.apache.cassandra.db.TypeSizes;
 import org.apache.cassandra.io.ISerializer;
 import org.apache.cassandra.io.util.DataInputPlus;
 import org.apache.cassandra.io.util.DataOutputPlus;
-import org.slf4j.Logger;
 
 public class EstimatedHistogram
 {
@@ -262,11 +263,27 @@ public class EstimatedHistogram
     }
 
     /**
-     * @return true if this histogram has overflowed -- that is, a value larger than our largest bucket could bound was added
+     * @return true if a value larger than our largest bucket offset has been recorded, and false otherwise
      */
     public boolean isOverflowed()
     {
-        return buckets.get(buckets.length() - 1) > 0;
+        return overflowCount() > 0;
+    }
+
+    /**
+     * @return the number of recorded values larger than the largest bucket offset
+     */
+    public long overflowCount()
+    {
+        return buckets.get(buckets.length() - 1);
+    }
+
+    /**
+     * Resets the count in the overflow bucket to zero. Subsequent calls to {@link #isOverflowed()} will return false.
+     */
+    public void clearOverflow()
+    {
+        buckets.set(buckets.length() - 1, 0);
     }
 
     /**
@@ -354,8 +371,16 @@ public class EstimatedHistogram
 
     public static class EstimatedHistogramSerializer implements ISerializer<EstimatedHistogram>
     {
+        private static final Logger logger = LoggerFactory.getLogger(EstimatedHistogramSerializer.class);
+
         public void serialize(EstimatedHistogram eh, DataOutputPlus out) throws IOException
         {
+            if (eh.isOverflowed())
+            {
+                logger.warn("Serializing a histogram with {} values greater than the maximum of {}...",
+                            eh.overflowCount(), eh.getLargestBucketOffset());
+            }
+
             long[] offsets = eh.getBucketOffsets();
             long[] buckets = eh.getBuckets(false);
             out.writeInt(buckets.length);
diff --git a/test/unit/org/apache/cassandra/io/sstable/metadata/MetadataSerializerTest.java b/test/unit/org/apache/cassandra/io/sstable/metadata/MetadataSerializerTest.java
index 9df3e11..af91f4e 100644
--- a/test/unit/org/apache/cassandra/io/sstable/metadata/MetadataSerializerTest.java
+++ b/test/unit/org/apache/cassandra/io/sstable/metadata/MetadataSerializerTest.java
@@ -45,6 +45,8 @@ import org.apache.cassandra.io.util.DataOutputStreamPlus;
 import org.apache.cassandra.io.util.RandomAccessReader;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
 
 public class MetadataSerializerTest
 {
@@ -74,6 +76,33 @@ public class MetadataSerializerTest
         }
     }
 
+    @Test
+    public void testHistogramSterilization() throws IOException
+    {
+        Map<MetadataType, MetadataComponent> originalMetadata = constructMetadata();
+
+        // Modify the histograms to overflow:
+        StatsMetadata originalStats = (StatsMetadata) originalMetadata.get(MetadataType.STATS);
+        originalStats.estimatedColumnCount.add(Long.MAX_VALUE);
+        originalStats.estimatedPartitionSize.add(Long.MAX_VALUE);
+        assertTrue(originalStats.estimatedColumnCount.isOverflowed());
+        assertTrue(originalStats.estimatedPartitionSize.isOverflowed());
+
+        // Serialize w/ overflowed histograms:
+        MetadataSerializer serializer = new MetadataSerializer();
+        File statsFile = serialize(originalMetadata, serializer, BigFormat.latestVersion);
+        Descriptor desc = new Descriptor(statsFile.getParentFile(), "", "", 0, SSTableFormat.Type.BIG);
+
+        try (RandomAccessReader in = RandomAccessReader.open(statsFile))
+        {
+            // Deserialie and verify that the two histograms have had their overflow buckets cleared:
+            Map<MetadataType, MetadataComponent> deserialized = serializer.deserialize(desc, in, EnumSet.allOf(MetadataType.class));
+            StatsMetadata deserializedStats = (StatsMetadata)deserialized.get(MetadataType.STATS);
+            assertFalse(deserializedStats.estimatedColumnCount.isOverflowed());
+            assertFalse(deserializedStats.estimatedPartitionSize.isOverflowed());
+        }
+    }
+
     public File serialize(Map<MetadataType, MetadataComponent> metadata, MetadataSerializer serializer, Version version)
             throws IOException
     {
diff --git a/test/unit/org/apache/cassandra/utils/EstimatedHistogramTest.java b/test/unit/org/apache/cassandra/utils/EstimatedHistogramTest.java
index b3fbfb6..edc297a 100644
--- a/test/unit/org/apache/cassandra/utils/EstimatedHistogramTest.java
+++ b/test/unit/org/apache/cassandra/utils/EstimatedHistogramTest.java
@@ -21,6 +21,8 @@ package org.apache.cassandra.utils;
 import org.junit.Test;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
 
 
 public class EstimatedHistogramTest
@@ -167,4 +169,14 @@ public class EstimatedHistogramTest
             assertEquals(1, histogram.percentile(0.99));
         }
     }
+
+    @Test
+    public void testClearOverflow()
+    {
+        EstimatedHistogram histogram = new EstimatedHistogram(1);
+        histogram.add(100);
+        assertTrue(histogram.isOverflowed());
+        histogram.clearOverflow();
+        assertFalse(histogram.isOverflowed());
+    }
 }
diff --git a/test/unit/org/apache/cassandra/utils/SerializationsTest.java b/test/unit/org/apache/cassandra/utils/SerializationsTest.java
index ac5a6a7..5a8d7f9 100644
--- a/test/unit/org/apache/cassandra/utils/SerializationsTest.java
+++ b/test/unit/org/apache/cassandra/utils/SerializationsTest.java
@@ -210,7 +210,7 @@ public class SerializationsTest extends AbstractSerializationsTester
             offsets[i] = i;
             data[i] = 10 * i;
         }
-        data[offsets.length] = 100000;
+        data[offsets.length] = 100000; // write into the overflow bucket
         EstimatedHistogram hist2 = new EstimatedHistogram(offsets, data);
 
         try (DataOutputStreamPlus out = getOutput("utils.EstimatedHistogram.bin"))


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@cassandra.apache.org
For additional commands, e-mail: commits-help@cassandra.apache.org