You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by bd...@apache.org on 2017/08/18 21:19:44 UTC

cassandra git commit: Add bytes repaired/unrepaired to nodetool tablestats

Repository: cassandra
Updated Branches:
  refs/heads/trunk e1a1b80d4 -> b740efa73


Add bytes repaired/unrepaired to nodetool tablestats

Patch by Blake Eggleston; reviewed by Chris Lohfink for CASSANDRA-13774


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/b740efa7
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/b740efa7
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/b740efa7

Branch: refs/heads/trunk
Commit: b740efa731ff2b684491ba4ef0cac2b000f96149
Parents: e1a1b80
Author: Blake Eggleston <bd...@gmail.com>
Authored: Thu Aug 17 10:43:30 2017 -0700
Committer: Blake Eggleston <bd...@gmail.com>
Committed: Fri Aug 18 14:17:36 2017 -0700

----------------------------------------------------------------------
 CHANGES.txt                                     |   1 +
 doc/source/operating/metrics.rst                |   3 +
 .../apache/cassandra/metrics/TableMetrics.java  | 123 ++++++++++++++++---
 .../org/apache/cassandra/tools/NodeProbe.java   |   3 +
 .../tools/nodetool/stats/StatsTable.java        |   3 +
 .../tools/nodetool/stats/TableStatsHolder.java  |  16 +++
 .../tools/nodetool/stats/TableStatsPrinter.java |   6 +
 7 files changed, 135 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/b740efa7/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index a02a7bf..1f67f3b 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 4.0
+ * Add bytes repaired/unrepaired to nodetool tablestats (CASSANDRA-13774)
  * Don't delete incremental repair sessions if they still have sstables (CASSANDRA-13758)
  * Fix pending repair manager index out of bounds check (CASSANDRA-13769)
  * Don't use RangeFetchMapCalculator when RF=1 (CASSANDRA-13576)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/b740efa7/doc/source/operating/metrics.rst
----------------------------------------------------------------------
diff --git a/doc/source/operating/metrics.rst b/doc/source/operating/metrics.rst
index cfdd584..10afc1b 100644
--- a/doc/source/operating/metrics.rst
+++ b/doc/source/operating/metrics.rst
@@ -126,6 +126,9 @@ CasPrepare                              Latency        Latency of paxos prepare
 CasPropose                              Latency        Latency of paxos propose round.
 CasCommit                               Latency        Latency of paxos commit round.
 PercentRepaired                         Gauge<Double>  Percent of table data that is repaired on disk.
+BytesRepaired                           Gauge<Long>    Size of table data repaired on disk
+BytesUnrepaired                         Gauge<Long>    Size of table data unrepaired on disk
+BytesPendingRepair                      Gauge<Long>    Size of table data isolated for an ongoing incremental repair
 SpeculativeRetries                      Counter        Number of times speculative retries were sent for this table.
 SpeculativeFailedRetries                Counter        Number of speculative retries that failed to prevent a timeout
 SpeculativeInsufficientReplicas         Counter        Number of speculative retries that couldn't be attempted due to lack of replicas

http://git-wip-us.apache.org/repos/asf/cassandra/blob/b740efa7/src/java/org/apache/cassandra/metrics/TableMetrics.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/metrics/TableMetrics.java b/src/java/org/apache/cassandra/metrics/TableMetrics.java
index 98fd1e9..58b017e 100644
--- a/src/java/org/apache/cassandra/metrics/TableMetrics.java
+++ b/src/java/org/apache/cassandra/metrics/TableMetrics.java
@@ -22,7 +22,9 @@ import java.util.*;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.TimeUnit;
+import java.util.function.Predicate;
 
+import com.google.common.collect.Iterables;
 import com.google.common.collect.Maps;
 
 import com.codahale.metrics.*;
@@ -38,6 +40,7 @@ import org.apache.cassandra.io.compress.CompressionMetadata;
 import org.apache.cassandra.io.sstable.format.SSTableReader;
 import org.apache.cassandra.io.sstable.metadata.MetadataCollector;
 import org.apache.cassandra.utils.EstimatedHistogram;
+import org.apache.cassandra.utils.Pair;
 import org.apache.cassandra.utils.TopKSampler;
 
 import static org.apache.cassandra.metrics.CassandraMetricsRegistry.Metrics;
@@ -146,6 +149,10 @@ public class TableMetrics
     public final LatencyMetrics casCommit;
     /** percent of the data that is repaired */
     public final Gauge<Double> percentRepaired;
+    /** Reports the size of sstables in repaired, unrepaired, and any ongoing repair buckets */
+    public final Gauge<Long> bytesRepaired;
+    public final Gauge<Long> bytesUnrepaired;
+    public final Gauge<Long> bytesPendingRepair;
     /** Number of started repairs as coordinator on this table */
     public final Counter repairsStarted;
     /** Number of completed repairs as coordinator on this table */
@@ -190,40 +197,76 @@ public class TableMetrics
     public final static LatencyMetrics globalWriteLatency = new LatencyMetrics(globalFactory, globalAliasFactory, "Write");
     public final static LatencyMetrics globalRangeLatency = new LatencyMetrics(globalFactory, globalAliasFactory, "Range");
 
-    public final static Gauge<Double> globalPercentRepaired = Metrics.register(globalFactory.createMetricName("PercentRepaired"),
-            new Gauge<Double>()
+    private static Pair<Long, Long> totalNonSystemTablesSize(Predicate<SSTableReader> predicate)
     {
-        public Double getValue()
+        long total = 0;
+        long filtered = 0;
+        for (String keyspace : Schema.instance.getNonSystemKeyspaces())
         {
-            double repaired = 0;
-            double total = 0;
-            for (String keyspace : Schema.instance.getNonSystemKeyspaces())
-            {
-                Keyspace k = Schema.instance.getKeyspaceInstance(keyspace);
-                if (SchemaConstants.DISTRIBUTED_KEYSPACE_NAME.equals(k.getName()))
-                    continue;
-                if (k.getReplicationStrategy().getReplicationFactor() < 2)
-                    continue;
 
-                for (ColumnFamilyStore cf : k.getColumnFamilyStores())
+            Keyspace k = Schema.instance.getKeyspaceInstance(keyspace);
+            if (SchemaConstants.DISTRIBUTED_KEYSPACE_NAME.equals(k.getName()))
+                continue;
+            if (k.getReplicationStrategy().getReplicationFactor() < 2)
+                continue;
+
+            for (ColumnFamilyStore cf : k.getColumnFamilyStores())
+            {
+                if (!SecondaryIndexManager.isIndexColumnFamily(cf.name))
                 {
-                    if (!SecondaryIndexManager.isIndexColumnFamily(cf.name))
+                    for (SSTableReader sstable : cf.getSSTables(SSTableSet.CANONICAL))
                     {
-                        for (SSTableReader sstable : cf.getSSTables(SSTableSet.CANONICAL))
+                        if (predicate.test(sstable))
                         {
-                            if (sstable.isRepaired())
-                            {
-                                repaired += sstable.uncompressedLength();
-                            }
-                            total += sstable.uncompressedLength();
+                            filtered += sstable.uncompressedLength();
                         }
+                        total += sstable.uncompressedLength();
                     }
                 }
             }
+        }
+        return Pair.create(filtered, total);
+    }
+
+    public static final Gauge<Double> globalPercentRepaired = Metrics.register(globalFactory.createMetricName("PercentRepaired"),
+                                                                               new Gauge<Double>()
+    {
+        public Double getValue()
+        {
+            Pair<Long, Long> result = totalNonSystemTablesSize(SSTableReader::isRepaired);
+            double repaired = result.left;
+            double total = result.right;
             return total > 0 ? (repaired / total) * 100 : 100.0;
         }
     });
 
+    public static final Gauge<Long> globalBytesRepaired = Metrics.register(globalFactory.createMetricName("BytesRepaired"),
+                                                                           new Gauge<Long>()
+    {
+        public Long getValue()
+        {
+            return totalNonSystemTablesSize(SSTableReader::isRepaired).left;
+        }
+    });
+
+    public static final Gauge<Long> globalBytesUnrepaired = Metrics.register(globalFactory.createMetricName("BytesUnrepaired"),
+                                                                             new Gauge<Long>()
+    {
+        public Long getValue()
+        {
+            return totalNonSystemTablesSize(s -> !s.isRepaired() && !s.isPendingRepair()).left;
+        }
+    });
+
+    public static final Gauge<Long> globalBytesPendingRepair = Metrics.register(globalFactory.createMetricName("BytesPendingRepair"),
+                                                                                new Gauge<Long>()
+    {
+        public Long getValue()
+        {
+            return totalNonSystemTablesSize(SSTableReader::isPendingRepair).left;
+        }
+    });
+
     public final Map<Sampler, TopKSampler<ByteBuffer>> samplers;
     /**
      * stores metrics that will be rolled into a single global metric
@@ -425,6 +468,46 @@ public class TableMetrics
                 return total > 0 ? (repaired / total) * 100 : 100.0;
             }
         });
+
+        bytesRepaired = createTableGauge("BytesRepaired", new Gauge<Long>()
+        {
+            public Long getValue()
+            {
+                long size = 0;
+                for (SSTableReader sstable: Iterables.filter(cfs.getSSTables(SSTableSet.CANONICAL), SSTableReader::isRepaired))
+                {
+                    size += sstable.uncompressedLength();
+                }
+                return size;
+            }
+        });
+
+        bytesUnrepaired = createTableGauge("BytesUnrepaired", new Gauge<Long>()
+        {
+            public Long getValue()
+            {
+                long size = 0;
+                for (SSTableReader sstable: Iterables.filter(cfs.getSSTables(SSTableSet.CANONICAL), s -> !s.isRepaired() && !s.isPendingRepair()))
+                {
+                    size += sstable.uncompressedLength();
+                }
+                return size;
+            }
+        });
+
+        bytesPendingRepair = createTableGauge("BytesPendingRepair", new Gauge<Long>()
+        {
+            public Long getValue()
+            {
+                long size = 0;
+                for (SSTableReader sstable: Iterables.filter(cfs.getSSTables(SSTableSet.CANONICAL), SSTableReader::isPendingRepair))
+                {
+                    size += sstable.uncompressedLength();
+                }
+                return size;
+            }
+        });
+
         readLatency = new LatencyMetrics(factory, "Read", cfs.keyspace.metric.readLatency, globalReadLatency);
         writeLatency = new LatencyMetrics(factory, "Write", cfs.keyspace.metric.writeLatency, globalWriteLatency);
         rangeLatency = new LatencyMetrics(factory, "Range", cfs.keyspace.metric.rangeLatency, globalRangeLatency);

http://git-wip-us.apache.org/repos/asf/cassandra/blob/b740efa7/src/java/org/apache/cassandra/tools/NodeProbe.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/tools/NodeProbe.java b/src/java/org/apache/cassandra/tools/NodeProbe.java
index 665ea6f..109a51e 100644
--- a/src/java/org/apache/cassandra/tools/NodeProbe.java
+++ b/src/java/org/apache/cassandra/tools/NodeProbe.java
@@ -1339,6 +1339,9 @@ public class NodeProbe implements AutoCloseable
                 case "MemtableOffHeapSize":
                 case "MinPartitionSize":
                 case "PercentRepaired":
+                case "BytesRepaired":
+                case "BytesUnrepaired":
+                case "BytesPendingRepair":
                 case "RecentBloomFilterFalsePositives":
                 case "RecentBloomFilterFalseRatio":
                 case "SnapshotsSize":

http://git-wip-us.apache.org/repos/asf/cassandra/blob/b740efa7/src/java/org/apache/cassandra/tools/nodetool/stats/StatsTable.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/tools/nodetool/stats/StatsTable.java b/src/java/org/apache/cassandra/tools/nodetool/stats/StatsTable.java
index 87bc527..642e05f 100644
--- a/src/java/org/apache/cassandra/tools/nodetool/stats/StatsTable.java
+++ b/src/java/org/apache/cassandra/tools/nodetool/stats/StatsTable.java
@@ -57,6 +57,9 @@ public class StatsTable
     public long compactedPartitionMaximumBytes;
     public long compactedPartitionMeanBytes;
     public double percentRepaired;
+    public long bytesRepaired;
+    public long bytesUnrepaired;
+    public long bytesPendingRepair;
     public double averageLiveCellsPerSliceLastFiveMinutes;
     public long maximumLiveCellsPerSliceLastFiveMinutes;
     public double averageTombstonesPerSliceLastFiveMinutes;

http://git-wip-us.apache.org/repos/asf/cassandra/blob/b740efa7/src/java/org/apache/cassandra/tools/nodetool/stats/TableStatsHolder.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/tools/nodetool/stats/TableStatsHolder.java b/src/java/org/apache/cassandra/tools/nodetool/stats/TableStatsHolder.java
index 19ab53c..929619a 100644
--- a/src/java/org/apache/cassandra/tools/nodetool/stats/TableStatsHolder.java
+++ b/src/java/org/apache/cassandra/tools/nodetool/stats/TableStatsHolder.java
@@ -83,6 +83,11 @@ public class TableStatsHolder implements StatsHolder
                 mpTable.put("local_write_latency_ms", String.format("%01.3f", table.localWriteLatencyMs));
                 mpTable.put("pending_flushes", table.pendingFlushes);
                 mpTable.put("percent_repaired", table.percentRepaired);
+
+                mpTable.put("bytes_repaired", table.bytesRepaired);
+                mpTable.put("bytes_unrepaired", table.bytesUnrepaired);
+                mpTable.put("bytes_pending_repair", table.bytesPendingRepair);
+
                 mpTable.put("bloom_filter_false_positives", table.bloomFilterFalsePositives);
                 mpTable.put("bloom_filter_false_ratio", String.format("%01.5f", table.bloomFilterFalseRatio));
                 mpTable.put("bloom_filter_space_used", table.bloomFilterSpaceUsed);
@@ -185,6 +190,9 @@ public class TableStatsHolder implements StatsHolder
                 Long compressionMetadataOffHeapSize = null;
                 Long offHeapSize = null;
                 Double percentRepaired = null;
+                Long bytesRepaired = null;
+                Long bytesUnrepaired = null;
+                Long bytesPendingRepair = null;
 
                 try
                 {
@@ -194,6 +202,9 @@ public class TableStatsHolder implements StatsHolder
                     compressionMetadataOffHeapSize = (Long) probe.getColumnFamilyMetric(keyspaceName, tableName, "CompressionMetadataOffHeapMemoryUsed");
                     offHeapSize = memtableOffHeapSize + bloomFilterOffHeapSize + indexSummaryOffHeapSize + compressionMetadataOffHeapSize;
                     percentRepaired = (Double) probe.getColumnFamilyMetric(keyspaceName, tableName, "PercentRepaired");
+                    bytesRepaired = (Long) probe.getColumnFamilyMetric(keyspaceName, tableName, "BytesRepaired");
+                    bytesUnrepaired = (Long) probe.getColumnFamilyMetric(keyspaceName, tableName, "BytesUnrepaired");
+                    bytesPendingRepair = (Long) probe.getColumnFamilyMetric(keyspaceName, tableName, "BytesPendingRepair");
                 }
                 catch (RuntimeException e)
                 {
@@ -215,6 +226,11 @@ public class TableStatsHolder implements StatsHolder
                 {
                     statsTable.percentRepaired = Math.round(100 * percentRepaired) / 100.0;
                 }
+
+                statsTable.bytesRepaired = bytesRepaired != null ? bytesRepaired : 0;
+                statsTable.bytesUnrepaired = bytesUnrepaired != null ? bytesUnrepaired : 0;
+                statsTable.bytesPendingRepair = bytesPendingRepair != null ? bytesPendingRepair : 0;
+
                 statsTable.sstableCompressionRatio = probe.getColumnFamilyMetric(keyspaceName, tableName, "CompressionRatio");
                 Object estimatedPartitionCount = probe.getColumnFamilyMetric(keyspaceName, tableName, "EstimatedPartitionCount");
                 if (Long.valueOf(-1L).equals(estimatedPartitionCount))

http://git-wip-us.apache.org/repos/asf/cassandra/blob/b740efa7/src/java/org/apache/cassandra/tools/nodetool/stats/TableStatsPrinter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/tools/nodetool/stats/TableStatsPrinter.java b/src/java/org/apache/cassandra/tools/nodetool/stats/TableStatsPrinter.java
index 7c76e44..fb959b2 100644
--- a/src/java/org/apache/cassandra/tools/nodetool/stats/TableStatsPrinter.java
+++ b/src/java/org/apache/cassandra/tools/nodetool/stats/TableStatsPrinter.java
@@ -21,6 +21,8 @@ package org.apache.cassandra.tools.nodetool.stats;
 import java.io.PrintStream;
 import java.util.List;
 
+import org.apache.cassandra.utils.FBUtilities;
+
 public class TableStatsPrinter
 {
     public static StatsPrinter from(String format)
@@ -86,6 +88,10 @@ public class TableStatsPrinter
                     out.println("\t\tPending flushes: " + table.pendingFlushes);
                     out.println("\t\tPercent repaired: " + table.percentRepaired);
 
+                    out.println("\t\tBytes repaired: " + FBUtilities.prettyPrintMemory(table.bytesRepaired));
+                    out.println("\t\tBytes unrepaired: " + FBUtilities.prettyPrintMemory(table.bytesUnrepaired));
+                    out.println("\t\tBytes pending repair: " + FBUtilities.prettyPrintMemory(table.bytesPendingRepair));
+
                     out.println("\t\tBloom filter false positives: " + table.bloomFilterFalsePositives);
                     out.printf("\t\tBloom filter false ratio: %01.5f%n", table.bloomFilterFalseRatio);
                     out.println("\t\tBloom filter space used: " + table.bloomFilterSpaceUsed);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@cassandra.apache.org
For additional commands, e-mail: commits-help@cassandra.apache.org