You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by bd...@apache.org on 2019/11/11 23:33:36 UTC
[cassandra] branch cassandra-3.0 updated: Minimize clustering
values in metadata collector
This is an automated email from the ASF dual-hosted git repository.
bdeggleston pushed a commit to branch cassandra-3.0
in repository https://gitbox.apache.org/repos/asf/cassandra.git
The following commit(s) were added to refs/heads/cassandra-3.0 by this push:
new 9382186 Minimize clustering values in metadata collector
9382186 is described below
commit 9382186f70cf0560c5308dc324411bef52cf461f
Author: Blake Eggleston <bd...@gmail.com>
AuthorDate: Thu Nov 7 10:48:51 2019 -0800
Minimize clustering values in metadata collector
Patch by Blake Eggleston; Reviewed by Marcus Eriksson for CASSANDRA-15400
---
CHANGES.txt | 1 +
src/java/org/apache/cassandra/db/Clustering.java | 8 +++++
.../org/apache/cassandra/db/ClusteringPrefix.java | 6 ++++
.../org/apache/cassandra/db/RangeTombstone.java | 8 +++++
src/java/org/apache/cassandra/db/Slice.java | 8 +++++
.../io/sstable/metadata/MetadataCollector.java | 35 ++--------------------
.../org/apache/cassandra/utils/ByteBufferUtil.java | 23 ++++++++++++++
.../cassandra/io/sstable/SSTableMetadataTest.java | 6 ++++
8 files changed, 62 insertions(+), 33 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index 547bb1a..6dd6d0a 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
3.0.20
+ * Minimize clustering values in metadata collector (CASSANDRA-15400)
* Avoid over-trimming of results in mixed mode clusters (CASSANDRA-15405)
* validate value sizes in LegacyLayout (CASSANDRA-15373)
* Ensure that tracing doesn't break connections in 3.x/4.0 mixed mode by default (CASSANDRA-15385)
diff --git a/src/java/org/apache/cassandra/db/Clustering.java b/src/java/org/apache/cassandra/db/Clustering.java
index a40cc1f..62af0f1 100644
--- a/src/java/org/apache/cassandra/db/Clustering.java
+++ b/src/java/org/apache/cassandra/db/Clustering.java
@@ -25,6 +25,7 @@ import org.apache.cassandra.config.CFMetaData;
import org.apache.cassandra.config.ColumnDefinition;
import org.apache.cassandra.db.marshal.AbstractType;
import org.apache.cassandra.io.util.*;
+import org.apache.cassandra.utils.ByteBufferUtil;
import org.apache.cassandra.utils.memory.AbstractAllocator;
/**
@@ -88,6 +89,13 @@ public class Clustering extends AbstractClusteringPrefix
return Kind.CLUSTERING;
}
+ public ClusteringPrefix minimize()
+ {
+ if (!ByteBufferUtil.canMinimize(values))
+ return this;
+ return new Clustering(ByteBufferUtil.minimizeBuffers(values));
+ }
+
public Clustering copy(AbstractAllocator allocator)
{
// Important for STATIC_CLUSTERING (but no point in being wasteful in general).
diff --git a/src/java/org/apache/cassandra/db/ClusteringPrefix.java b/src/java/org/apache/cassandra/db/ClusteringPrefix.java
index 3b826c9..c1000e4 100644
--- a/src/java/org/apache/cassandra/db/ClusteringPrefix.java
+++ b/src/java/org/apache/cassandra/db/ClusteringPrefix.java
@@ -246,6 +246,12 @@ public interface ClusteringPrefix extends IMeasurableMemory, Clusterable
*/
public ByteBuffer[] getRawValues();
+ /**
+ * If the prefix contains byte buffers that can be minimized (see {@link ByteBufferUtil#minimalBufferFor(ByteBuffer)}),
+ * this will return a copy of the prefix with minimized values, otherwise it returns itself.
+ */
+ public ClusteringPrefix minimize();
+
public static class Serializer
{
public void serialize(ClusteringPrefix clustering, DataOutputPlus out, int version, List<AbstractType<?>> types) throws IOException
diff --git a/src/java/org/apache/cassandra/db/RangeTombstone.java b/src/java/org/apache/cassandra/db/RangeTombstone.java
index 8af3b97..4a26581 100644
--- a/src/java/org/apache/cassandra/db/RangeTombstone.java
+++ b/src/java/org/apache/cassandra/db/RangeTombstone.java
@@ -26,6 +26,7 @@ import org.apache.cassandra.db.marshal.AbstractType;
import org.apache.cassandra.db.rows.RangeTombstoneMarker;
import org.apache.cassandra.io.util.DataInputPlus;
import org.apache.cassandra.io.util.DataOutputPlus;
+import org.apache.cassandra.utils.ByteBufferUtil;
import org.apache.cassandra.utils.memory.AbstractAllocator;
@@ -174,6 +175,13 @@ public class RangeTombstone
return new Bound(kind(), newValues);
}
+ public ClusteringPrefix minimize()
+ {
+ if (!ByteBufferUtil.canMinimize(values))
+ return this;
+ return new Bound(kind, ByteBufferUtil.minimizeBuffers(values));
+ }
+
@Override
public Bound withNewKind(Kind kind)
{
diff --git a/src/java/org/apache/cassandra/db/Slice.java b/src/java/org/apache/cassandra/db/Slice.java
index f90c195..fb75b8e 100644
--- a/src/java/org/apache/cassandra/db/Slice.java
+++ b/src/java/org/apache/cassandra/db/Slice.java
@@ -25,6 +25,7 @@ import org.apache.cassandra.config.CFMetaData;
import org.apache.cassandra.db.marshal.AbstractType;
import org.apache.cassandra.io.util.DataInputPlus;
import org.apache.cassandra.io.util.DataOutputPlus;
+import org.apache.cassandra.utils.ByteBufferUtil;
/**
* A slice represents the selection of a range of rows.
@@ -494,6 +495,13 @@ public class Slice
return sb.append(')').toString();
}
+ public ClusteringPrefix minimize()
+ {
+ if (!ByteBufferUtil.canMinimize(values))
+ return this;
+ return new Bound(kind, ByteBufferUtil.minimizeBuffers(values));
+ }
+
/**
* Serializer for slice bounds.
* <p>
diff --git a/src/java/org/apache/cassandra/io/sstable/metadata/MetadataCollector.java b/src/java/org/apache/cassandra/io/sstable/metadata/MetadataCollector.java
index 437d80f..867e9a1 100644
--- a/src/java/org/apache/cassandra/io/sstable/metadata/MetadataCollector.java
+++ b/src/java/org/apache/cassandra/io/sstable/metadata/MetadataCollector.java
@@ -229,42 +229,11 @@ public class MetadataCollector implements PartitionStatisticsCollector
public MetadataCollector updateClusteringValues(ClusteringPrefix clustering)
{
- minClustering = minClustering == null || comparator.compare(clustering, minClustering) < 0 ? clustering : minClustering;
- maxClustering = maxClustering == null || comparator.compare(clustering, maxClustering) > 0 ? clustering : maxClustering;
+ minClustering = minClustering == null || comparator.compare(clustering, minClustering) < 0 ? clustering.minimize() : minClustering;
+ maxClustering = maxClustering == null || comparator.compare(clustering, maxClustering) > 0 ? clustering.minimize() : maxClustering;
return this;
}
- private static ByteBuffer maybeMinimize(ByteBuffer buffer)
- {
- // ByteBuffer.minimalBufferFor doesn't handle null, but we can get it in this case since it's possible
- // for some clustering values to be null
- return buffer == null ? null : ByteBufferUtil.minimalBufferFor(buffer);
- }
-
- private static ByteBuffer min(ByteBuffer b1, ByteBuffer b2, AbstractType<?> comparator)
- {
- if (b1 == null)
- return b2;
- if (b2 == null)
- return b1;
-
- if (comparator.compare(b1, b2) >= 0)
- return b2;
- return b1;
- }
-
- private static ByteBuffer max(ByteBuffer b1, ByteBuffer b2, AbstractType<?> comparator)
- {
- if (b1 == null)
- return b2;
- if (b2 == null)
- return b1;
-
- if (comparator.compare(b1, b2) >= 0)
- return b1;
- return b2;
- }
-
public void updateHasLegacyCounterShards(boolean hasLegacyCounterShards)
{
this.hasLegacyCounterShards = this.hasLegacyCounterShards || hasLegacyCounterShards;
diff --git a/src/java/org/apache/cassandra/utils/ByteBufferUtil.java b/src/java/org/apache/cassandra/utils/ByteBufferUtil.java
index 0e69f7e..e5a3bb8 100644
--- a/src/java/org/apache/cassandra/utils/ByteBufferUtil.java
+++ b/src/java/org/apache/cassandra/utils/ByteBufferUtil.java
@@ -610,12 +610,35 @@ public class ByteBufferUtil
return prefix.equals(value.duplicate().limit(value.remaining() - diff));
}
+ public static boolean canMinimize(ByteBuffer buf)
+ {
+ return buf != null && (buf.capacity() > buf.remaining() || !buf.hasArray());
+ }
+
/** trims size of bytebuffer to exactly number of bytes in it, to do not hold too much memory */
public static ByteBuffer minimalBufferFor(ByteBuffer buf)
{
return buf.capacity() > buf.remaining() || !buf.hasArray() ? ByteBuffer.wrap(getArray(buf)) : buf;
}
+ public static ByteBuffer[] minimizeBuffers(ByteBuffer[] src)
+ {
+ ByteBuffer[] dst = new ByteBuffer[src.length];
+ for (int i=0; i<src.length; i++)
+ dst[i] = src[i] != null ? minimalBufferFor(src[i]) : null;
+ return dst;
+ }
+
+ public static boolean canMinimize(ByteBuffer[] src)
+ {
+ for (ByteBuffer buffer : src)
+ {
+ if (canMinimize(buffer))
+ return true;
+ }
+ return false;
+ }
+
// Doesn't change bb position
public static int getShortLength(ByteBuffer bb, int position)
{
diff --git a/test/unit/org/apache/cassandra/io/sstable/SSTableMetadataTest.java b/test/unit/org/apache/cassandra/io/sstable/SSTableMetadataTest.java
index 419eb25..f39cf3b 100644
--- a/test/unit/org/apache/cassandra/io/sstable/SSTableMetadataTest.java
+++ b/test/unit/org/apache/cassandra/io/sstable/SSTableMetadataTest.java
@@ -221,6 +221,9 @@ public class SSTableMetadataTest
{
assertEquals(ByteBufferUtil.string(sstable.getSSTableMetadata().minClusteringValues.get(0)), "0col100");
assertEquals(ByteBufferUtil.string(sstable.getSSTableMetadata().maxClusteringValues.get(0)), "7col149");
+ // make sure the clustering values are minimised
+ assertTrue(sstable.getSSTableMetadata().minClusteringValues.get(0).capacity() < 50);
+ assertTrue(sstable.getSSTableMetadata().maxClusteringValues.get(0).capacity() < 50);
}
String key = "row2";
@@ -240,6 +243,9 @@ public class SSTableMetadataTest
{
assertEquals(ByteBufferUtil.string(sstable.getSSTableMetadata().minClusteringValues.get(0)), "0col100");
assertEquals(ByteBufferUtil.string(sstable.getSSTableMetadata().maxClusteringValues.get(0)), "9col298");
+ // and make sure the clustering values are still minimised after compaction
+ assertTrue(sstable.getSSTableMetadata().minClusteringValues.get(0).capacity() < 50);
+ assertTrue(sstable.getSSTableMetadata().maxClusteringValues.get(0).capacity() < 50);
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@cassandra.apache.org
For additional commands, e-mail: commits-help@cassandra.apache.org