You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by jm...@apache.org on 2014/11/06 17:21:06 UTC

[1/2] git commit: Fix overflow on histogram computation

Repository: cassandra
Updated Branches:
  refs/heads/trunk d5b2fa206 -> b4cbb28f3


Fix overflow on histogram computation

Patch by cyeksigian; reviewed by jmckenzie for CASSANDRA-8028


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/9685622b
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/9685622b
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/9685622b

Branch: refs/heads/trunk
Commit: 9685622b32d19ffc9315e764a224f41cbe6b6d4a
Parents: 85ea373
Author: Carl Yeksigian <ca...@apache.org>
Authored: Thu Nov 6 10:18:09 2014 -0600
Committer: Joshua McKenzie <jm...@apache.org>
Committed: Thu Nov 6 10:18:09 2014 -0600

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../cassandra/metrics/ColumnFamilyMetrics.java  | 67 ++++++++++++++++----
 .../org/apache/cassandra/tools/NodeTool.java    | 32 ++++++++--
 3 files changed, 80 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/9685622b/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 5348f2f..89db48e 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 2.1.3
+ * Fix overflow on histogram computation (CASSANDRA-8028)
  * Have paxos reuse the timestamp generation of normal queries (CASSANDRA-7801)
 
 2.1.2

http://git-wip-us.apache.org/repos/asf/cassandra/blob/9685622b/src/java/org/apache/cassandra/metrics/ColumnFamilyMetrics.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/metrics/ColumnFamilyMetrics.java b/src/java/org/apache/cassandra/metrics/ColumnFamilyMetrics.java
index d9d3ed9..2bdaf27 100644
--- a/src/java/org/apache/cassandra/metrics/ColumnFamilyMetrics.java
+++ b/src/java/org/apache/cassandra/metrics/ColumnFamilyMetrics.java
@@ -18,6 +18,7 @@
 package org.apache.cassandra.metrics;
 
 import java.util.HashSet;
+import java.util.Iterator;
 import java.util.Set;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.TimeUnit;
@@ -146,6 +147,46 @@ public class ColumnFamilyMetrics
      * Stores all metric names created that can be used when unregistering
      */
     public final static Set<String> all = Sets.newHashSet();
+
+    private interface GetHistogram
+    {
+        public EstimatedHistogram getHistogram(SSTableReader reader);
+    }
+
+    private static long[] combineHistograms(Iterable<SSTableReader> sstables, GetHistogram getHistogram)
+    {
+        Iterator<SSTableReader> iterator = sstables.iterator();
+        if (!iterator.hasNext())
+        {
+            return new long[0];
+        }
+        long[] firstBucket = getHistogram.getHistogram(iterator.next()).getBuckets(false);
+        long[] values = new long[firstBucket.length];
+        System.arraycopy(firstBucket, 0, values, 0, values.length);
+
+        while (iterator.hasNext())
+        {
+            long[] nextBucket = getHistogram.getHistogram(iterator.next()).getBuckets(false);
+            if (nextBucket.length > values.length)
+            {
+                long[] newValues = new long[nextBucket.length];
+                System.arraycopy(firstBucket, 0, newValues, 0, firstBucket.length);
+                for (int i = 0; i < newValues.length; i++)
+                {
+                    newValues[i] += nextBucket[i];
+                }
+                values = newValues;
+            }
+            else
+            {
+                for (int i = 0; i < values.length; i++)
+                {
+                    values[i] += nextBucket[i];
+                }
+            }
+        }
+        return values;
+    }
     
     /**
      * Creates metrics for given {@link ColumnFamilyStore}.
@@ -219,28 +260,26 @@ public class ColumnFamilyMetrics
         {
             public long[] value()
             {
-                long[] histogram = new long[90];
-                for (SSTableReader sstable : cfs.getSSTables())
+                return combineHistograms(cfs.getSSTables(), new GetHistogram()
                 {
-                    long[] rowSize = sstable.getEstimatedRowSize().getBuckets(false);
-                    for (int i = 0; i < histogram.length; i++)
-                        histogram[i] += rowSize[i];
-                }
-                return histogram;
+                    public EstimatedHistogram getHistogram(SSTableReader reader)
+                    {
+                        return reader.getEstimatedRowSize();
+                    }
+                });
             }
         });
         estimatedColumnCountHistogram = Metrics.newGauge(factory.createMetricName("EstimatedColumnCountHistogram"), new Gauge<long[]>()
         {
             public long[] value()
             {
-                long[] histogram = new long[90];
-                for (SSTableReader sstable : cfs.getSSTables())
+                return combineHistograms(cfs.getSSTables(), new GetHistogram()
                 {
-                    long[] columnSize = sstable.getEstimatedColumnCount().getBuckets(false);
-                    for (int i = 0; i < histogram.length; i++)
-                        histogram[i] += columnSize[i];
-                }
-                return histogram;
+                    public EstimatedHistogram getHistogram(SSTableReader reader)
+                    {
+                        return reader.getEstimatedColumnCount();
+                    }
+                });
             }
         });
         sstablesPerReadHistogram = createColumnFamilyHistogram("SSTablesPerReadHistogram", cfs.keyspace.metric.sstablesPerReadHistogram);

http://git-wip-us.apache.org/repos/asf/cassandra/blob/9685622b/src/java/org/apache/cassandra/tools/NodeTool.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/tools/NodeTool.java b/src/java/org/apache/cassandra/tools/NodeTool.java
index 39bc4fd..c09751b 100644
--- a/src/java/org/apache/cassandra/tools/NodeTool.java
+++ b/src/java/org/apache/cassandra/tools/NodeTool.java
@@ -895,18 +895,38 @@ public class NodeTool
             long[] estimatedRowSize = (long[]) probe.getColumnFamilyMetric(keyspace, cfname, "EstimatedRowSizeHistogram");
             long[] estimatedColumnCount = (long[]) probe.getColumnFamilyMetric(keyspace, cfname, "EstimatedColumnCountHistogram");
 
-            long[] bucketOffsets = new EstimatedHistogram().getBucketOffsets();
-            EstimatedHistogram rowSizeHist = new EstimatedHistogram(bucketOffsets, estimatedRowSize);
-            EstimatedHistogram columnCountHist = new EstimatedHistogram(bucketOffsets, estimatedColumnCount);
+            long[] rowSizeBucketOffsets = new EstimatedHistogram(estimatedRowSize.length).getBucketOffsets();
+            long[] columnCountBucketOffsets = new EstimatedHistogram(estimatedColumnCount.length).getBucketOffsets();
+            EstimatedHistogram rowSizeHist = new EstimatedHistogram(rowSizeBucketOffsets, estimatedRowSize);
+            EstimatedHistogram columnCountHist = new EstimatedHistogram(columnCountBucketOffsets, estimatedColumnCount);
 
             // build arrays to store percentile values
             double[] estimatedRowSizePercentiles = new double[7];
             double[] estimatedColumnCountPercentiles = new double[7];
             double[] offsetPercentiles = new double[]{0.5, 0.75, 0.95, 0.98, 0.99};
-            for (int i = 0; i < offsetPercentiles.length; i++)
+
+            if (rowSizeHist.isOverflowed())
+            {
+                System.err.println(String.format("Row sizes are larger than %s, unable to calculate percentiles", rowSizeBucketOffsets[rowSizeBucketOffsets.length - 1]));
+                for (int i = 0; i < offsetPercentiles.length; i++)
+                        estimatedRowSizePercentiles[i] = Double.NaN;
+            }
+            else
+            {
+                for (int i = 0; i < offsetPercentiles.length; i++)
+                    estimatedRowSizePercentiles[i] = rowSizeHist.percentile(offsetPercentiles[i]);
+            }
+
+            if (columnCountHist.isOverflowed())
+            {
+                System.err.println(String.format("Column counts are larger than %s, unable to calculate percentiles", columnCountBucketOffsets[columnCountBucketOffsets.length - 1]));
+                for (int i = 0; i < estimatedColumnCountPercentiles.length; i++)
+                    estimatedColumnCountPercentiles[i] = Double.NaN;
+            }
+            else
             {
-                estimatedRowSizePercentiles[i] = rowSizeHist.percentile(offsetPercentiles[i]);
-                estimatedColumnCountPercentiles[i] = columnCountHist.percentile(offsetPercentiles[i]);
+                for (int i = 0; i < offsetPercentiles.length; i++)
+                    estimatedColumnCountPercentiles[i] = columnCountHist.percentile(offsetPercentiles[i]);
             }
 
             // min value


[2/2] git commit: Merge branch 'cassandra-2.1' into trunk

Posted by jm...@apache.org.
Merge branch 'cassandra-2.1' into trunk


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/b4cbb28f
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/b4cbb28f
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/b4cbb28f

Branch: refs/heads/trunk
Commit: b4cbb28f3759fc92ab2a43432fcbce33bac60d7a
Parents: d5b2fa2 9685622
Author: Joshua McKenzie <jm...@apache.org>
Authored: Thu Nov 6 10:19:03 2014 -0600
Committer: Joshua McKenzie <jm...@apache.org>
Committed: Thu Nov 6 10:19:03 2014 -0600

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../cassandra/metrics/ColumnFamilyMetrics.java  | 67 ++++++++++++++++----
 .../org/apache/cassandra/tools/NodeTool.java    | 32 ++++++++--
 3 files changed, 80 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/b4cbb28f/CHANGES.txt
----------------------------------------------------------------------
diff --cc CHANGES.txt
index 79318fc,89db48e..92b2b2c
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@@ -1,36 -1,5 +1,37 @@@
 +3.0
 + * Mark sstables as repaired after full repair (CASSANDRA-7586) 
 + * Extend Descriptor to include a format value and refactor reader/writer apis (CASSANDRA-7443)
 + * Integrate JMH for microbenchmarks (CASSANDRA-8151)
 + * Keep sstable levels when bootstrapping (CASSANDRA-7460)
 + * Add Sigar library and perform basic OS settings check on startup (CASSANDRA-7838)
 + * Support for aggregation functions (CASSANDRA-4914)
 + * Remove cassandra-cli (CASSANDRA-7920)
 + * Accept dollar quoted strings in CQL (CASSANDRA-7769)
 + * Make assassinate a first class command (CASSANDRA-7935)
 + * Support IN clause on any clustering column (CASSANDRA-4762)
 + * Improve compaction logging (CASSANDRA-7818)
 + * Remove YamlFileNetworkTopologySnitch (CASSANDRA-7917)
 + * Do anticompaction in groups (CASSANDRA-6851)
 + * Support pure user-defined functions (CASSANDRA-7395, 7526, 7562, 7740, 7781, 7929,
 +   7924, 7812, 8063)
 + * Permit configurable timestamps with cassandra-stress (CASSANDRA-7416)
 + * Move sstable RandomAccessReader to nio2, which allows using the
 +   FILE_SHARE_DELETE flag on Windows (CASSANDRA-4050)
 + * Remove CQL2 (CASSANDRA-5918)
 + * Add Thrift get_multi_slice call (CASSANDRA-6757)
 + * Optimize fetching multiple cells by name (CASSANDRA-6933)
 + * Allow compilation in java 8 (CASSANDRA-7028)
 + * Make incremental repair default (CASSANDRA-7250)
 + * Enable code coverage thru JaCoCo (CASSANDRA-7226)
 + * Switch external naming of 'column families' to 'tables' (CASSANDRA-4369) 
 + * Shorten SSTable path (CASSANDRA-6962)
 + * Use unsafe mutations for most unit tests (CASSANDRA-6969)
 + * Fix race condition during calculation of pending ranges (CASSANDRA-7390)
 + * Fail on very large batch sizes (CASSANDRA-8011)
 + * improve concurrency of repair (CASSANDRA-6455)
 +
  2.1.3
+  * Fix overflow on histogram computation (CASSANDRA-8028)
   * Have paxos reuse the timestamp generation of normal queries (CASSANDRA-7801)
  
  2.1.2

http://git-wip-us.apache.org/repos/asf/cassandra/blob/b4cbb28f/src/java/org/apache/cassandra/metrics/ColumnFamilyMetrics.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/cassandra/blob/b4cbb28f/src/java/org/apache/cassandra/tools/NodeTool.java
----------------------------------------------------------------------