You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by xe...@apache.org on 2011/11/10 11:07:26 UTC

svn commit: r1200251 - in /cassandra/branches/cassandra-1.0: ./ src/java/org/apache/cassandra/db/ src/java/org/apache/cassandra/io/compress/ src/java/org/apache/cassandra/io/sstable/ test/unit/org/apache/cassandra/io/compress/ test/unit/org/apache/cass...

Author: xedin
Date: Thu Nov 10 10:07:25 2011
New Revision: 1200251

URL: http://svn.apache.org/viewvc?rev=1200251&view=rev
Log:
report compression ratio in CFSMBean
patch by Vijay; reviewed by Pavel Yaskevich for CASSANDRA-3393

Modified:
    cassandra/branches/cassandra-1.0/CHANGES.txt
    cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/db/ColumnFamilyStore.java
    cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/db/ColumnFamilyStoreMBean.java
    cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/db/DataTracker.java
    cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/compress/CompressedSequentialWriter.java
    cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/sstable/Descriptor.java
    cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/sstable/SSTableMetadata.java
    cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
    cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java
    cassandra/branches/cassandra-1.0/test/unit/org/apache/cassandra/io/compress/CompressedRandomAccessReaderTest.java
    cassandra/branches/cassandra-1.0/test/unit/org/apache/cassandra/io/sstable/SSTableMetadataSerializerTest.java

Modified: cassandra/branches/cassandra-1.0/CHANGES.txt
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0/CHANGES.txt?rev=1200251&r1=1200250&r2=1200251&view=diff
==============================================================================
--- cassandra/branches/cassandra-1.0/CHANGES.txt (original)
+++ cassandra/branches/cassandra-1.0/CHANGES.txt Thu Nov 10 10:07:25 2011
@@ -7,6 +7,7 @@
  * automatically compute sha1 sum for uncompressed data files (CASSANDRA-3456)
  * fix reading metadata/statistics component for version < h (CASSANDRA-3474)
  * add sstable forward-compatibility (CASSANDRA-3478)
+ * report compression ratio in CFSMBean (CASSANDRA-3393)
 Merged from 0.8:
  * Make counter shard merging thread safe (CASSANDRA-3178)
  * fix updating CF row_cache_provider (CASSANDRA-3414)
@@ -18,7 +19,6 @@ Merged from 0.8:
  * Fix bug preventing the use of efficient cross-DC writes (CASSANDRA-3472)
  * `describe ring` command for CLI (CASSANDRA-3220)
 
-
 1.0.2
  * "defragment" rows for name-based queries under STCS (CASSANDRA-2503)
  * cleanup usage of StorageService.setMode() (CASANDRA-3388)

Modified: cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/db/ColumnFamilyStore.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/db/ColumnFamilyStore.java?rev=1200251&r1=1200250&r2=1200251&view=diff
==============================================================================
--- cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/db/ColumnFamilyStore.java (original)
+++ cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/db/ColumnFamilyStore.java Thu Nov 10 10:07:25 2011
@@ -1782,6 +1782,12 @@ public class ColumnFamilyStore implement
         return data.getEstimatedColumnCountHistogram();
     }
 
+    @Override
+    public double getCompressionRatio()
+    {
+        return data.getCompressionRatio();
+    }
+    
     /** true if this CFS contains secondary index data */
     public boolean isIndex()
     {

Modified: cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/db/ColumnFamilyStoreMBean.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/db/ColumnFamilyStoreMBean.java?rev=1200251&r1=1200250&r2=1200251&view=diff
==============================================================================
--- cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/db/ColumnFamilyStoreMBean.java (original)
+++ cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/db/ColumnFamilyStoreMBean.java Thu Nov 10 10:07:25 2011
@@ -213,6 +213,7 @@ public interface ColumnFamilyStoreMBean
 
     public long[] getEstimatedRowSizeHistogram();
     public long[] getEstimatedColumnCountHistogram();
+    public double getCompressionRatio();
 
     /**
      * Returns a list of the names of the built column indexes for current store

Modified: cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/db/DataTracker.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/db/DataTracker.java?rev=1200251&r1=1200250&r2=1200251&view=diff
==============================================================================
--- cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/db/DataTracker.java (original)
+++ cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/db/DataTracker.java Thu Nov 10 10:07:25 2011
@@ -364,6 +364,21 @@ public class DataTracker
         return histogram;
     }
 
+    public double getCompressionRatio()
+    {
+        double sum = 0;
+        int total = 0;
+        for (SSTableReader sstable : getSSTables())
+        {
+            if (sstable.getCompressionRatio() != Double.MIN_VALUE)
+            {
+                sum += sstable.getCompressionRatio();
+                total++;
+            }
+        }
+        return total != 0 ? (double)sum/total: 0;
+    }
+
     public long getMinRowSize()
     {
         long min = 0;

Modified: cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/compress/CompressedSequentialWriter.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/compress/CompressedSequentialWriter.java?rev=1200251&r1=1200250&r2=1200251&view=diff
==============================================================================
--- cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/compress/CompressedSequentialWriter.java (original)
+++ cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/compress/CompressedSequentialWriter.java Thu Nov 10 10:07:25 2011
@@ -23,14 +23,15 @@ import java.io.IOException;
 import java.util.zip.CRC32;
 import java.util.zip.Checksum;
 
+import org.apache.cassandra.io.sstable.SSTableMetadata.Collector;
 import org.apache.cassandra.io.util.FileMark;
 import org.apache.cassandra.io.util.SequentialWriter;
 
 public class CompressedSequentialWriter extends SequentialWriter
 {
-    public static SequentialWriter open(String dataFilePath, String indexFilePath, boolean skipIOCache, CompressionParameters parameters) throws IOException
+    public static SequentialWriter open(String dataFilePath, String indexFilePath, boolean skipIOCache, CompressionParameters parameters, Collector sstableMetadataCollector) throws IOException
     {
-        return new CompressedSequentialWriter(new File(dataFilePath), indexFilePath, skipIOCache, parameters);
+        return new CompressedSequentialWriter(new File(dataFilePath), indexFilePath, skipIOCache, parameters, sstableMetadataCollector);
     }
 
     // holds offset in the file where current chunk should be written
@@ -49,7 +50,11 @@ public class CompressedSequentialWriter 
 
     private final Checksum checksum = new CRC32();
 
-    public CompressedSequentialWriter(File file, String indexFilePath, boolean skipIOCache, CompressionParameters parameters) throws IOException
+    private long originalSize = 0, compressedSize = 0;
+
+    private Collector sstableMetadataCollector;
+    
+    public CompressedSequentialWriter(File file, String indexFilePath, boolean skipIOCache, CompressionParameters parameters, Collector sstableMetadataCollector) throws IOException
     {
         super(file, parameters.chunkLength(), skipIOCache);
         this.compressor = parameters.sstableCompressor;
@@ -60,6 +65,7 @@ public class CompressedSequentialWriter 
         /* Index File (-CompressionInfo.db component) and it's header */
         metadataWriter = new CompressionMetadata.Writer(indexFilePath);
         metadataWriter.writeHeader(parameters);
+        this.sstableMetadataCollector = sstableMetadataCollector;
     }
 
     @Override
@@ -82,6 +88,9 @@ public class CompressedSequentialWriter 
         // compressing data with buffer re-use
         int compressedLength = compressor.compress(buffer, 0, validBufferBytes, compressed, 0);
 
+        originalSize += validBufferBytes;
+        compressedSize += compressedLength;
+        
         // update checksum
         checksum.update(buffer, 0, validBufferBytes);
 
@@ -179,7 +188,7 @@ public class CompressedSequentialWriter 
             return; // already closed
 
         super.close();
-
+        sstableMetadataCollector.addCompressionRatio(compressedSize, originalSize);
         metadataWriter.finalizeHeader(current, chunkCount);
         metadataWriter.close();
     }

Modified: cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/sstable/Descriptor.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/sstable/Descriptor.java?rev=1200251&r1=1200250&r2=1200251&view=diff
==============================================================================
--- cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/sstable/Descriptor.java (original)
+++ cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/sstable/Descriptor.java Thu Nov 10 10:07:25 2011
@@ -56,7 +56,7 @@ public class Descriptor
     // f (0.7.0): switched bloom filter implementations in data component
     // g (0.8): tracks flushed-at context in metadata component
     // h (1.0): tracks max client timestamp in metadata component
-    public static final String CURRENT_VERSION = "h";
+    public static final String CURRENT_VERSION = "hb";
 
     public final File directory;
     /** version has the following format: <code>[a-z]+</code> */
@@ -74,6 +74,7 @@ public class Descriptor
     public final boolean usesOldBloomFilter;
     public final boolean metadataIncludesReplayPosition;
     public final boolean tracksMaxTimestamp;
+    public final boolean hasCompressionRatio;
 
     public enum TempState
     {
@@ -115,6 +116,7 @@ public class Descriptor
         usesOldBloomFilter = version.compareTo("f") < 0;
         metadataIncludesReplayPosition = version.compareTo("g") >= 0;
         tracksMaxTimestamp = version.compareTo("h") >= 0;
+        hasCompressionRatio = version.compareTo("hb") >= 0;
         isLatestVersion = version.compareTo(CURRENT_VERSION) == 0;
     }
 

Modified: cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/sstable/SSTableMetadata.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/sstable/SSTableMetadata.java?rev=1200251&r1=1200250&r2=1200251&view=diff
==============================================================================
--- cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/sstable/SSTableMetadata.java (original)
+++ cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/sstable/SSTableMetadata.java Thu Nov 10 10:07:25 2011
@@ -51,19 +51,21 @@ public class SSTableMetadata
     protected final EstimatedHistogram estimatedColumnCount;
     protected final ReplayPosition replayPosition;
     protected final long maxTimestamp;
+    protected final double compressionRatio;
     public static final SSTableMetadataSerializer serializer = new SSTableMetadataSerializer();
 
     private SSTableMetadata()
     {
-        this(defaultRowSizeHistogram(), defaultColumnCountHistogram(), ReplayPosition.NONE, Long.MIN_VALUE);
+        this(defaultRowSizeHistogram(), defaultColumnCountHistogram(), ReplayPosition.NONE, Long.MIN_VALUE, Double.MIN_VALUE);
     }
 
-    private SSTableMetadata(EstimatedHistogram rowSizes, EstimatedHistogram columnCounts, ReplayPosition replayPosition, long maxTimestamp)
+    private SSTableMetadata(EstimatedHistogram rowSizes, EstimatedHistogram columnCounts, ReplayPosition replayPosition, long maxTimestamp, double cr)
     {
         this.estimatedRowSize = rowSizes;
         this.estimatedColumnCount = columnCounts;
         this.replayPosition = replayPosition;
         this.maxTimestamp = maxTimestamp;
+        this.compressionRatio = cr;
     }
 
     public static SSTableMetadata createDefaultInstance()
@@ -96,6 +98,11 @@ public class SSTableMetadata
         return maxTimestamp;
     }
 
+    public double getCompressionRatio()
+    {
+        return compressionRatio;
+    }
+
     static EstimatedHistogram defaultColumnCountHistogram()
     {
         // EH of 114 can track a max value of 2395318855, i.e., > 2B columns
@@ -114,6 +121,7 @@ public class SSTableMetadata
         protected EstimatedHistogram estimatedColumnCount;
         protected ReplayPosition replayPosition;
         protected long maxTimestamp;
+        protected double compressionRatio;
 
         private Collector()
         {
@@ -121,6 +129,7 @@ public class SSTableMetadata
             this.estimatedColumnCount = defaultColumnCountHistogram();
             this.replayPosition = ReplayPosition.NONE;
             this.maxTimestamp = Long.MIN_VALUE;
+            this.compressionRatio = Double.MIN_VALUE;
         }
 
         public void addRowSize(long rowSize)
@@ -133,6 +142,15 @@ public class SSTableMetadata
             estimatedColumnCount.add(columnCount);
         }
 
+        /**
+         * Ratio is compressed/uncompressed and it is
+         * if you have 1.x then compression isn't helping 
+         */
+        public void addCompressionRatio(long compressed, long uncompressed)
+        {
+            compressionRatio = (double) compressed/uncompressed;
+        }
+        
         public void updateMaxTimestamp(long potentialMax)
         {
             maxTimestamp = Math.max(maxTimestamp, potentialMax);
@@ -140,7 +158,7 @@ public class SSTableMetadata
 
         public SSTableMetadata finalizeMetadata()
         {
-            return new SSTableMetadata(estimatedRowSize, estimatedColumnCount, replayPosition, maxTimestamp);
+            return new SSTableMetadata(estimatedRowSize, estimatedColumnCount, replayPosition, maxTimestamp, compressionRatio);
         }
 
         public Collector estimatedRowSize(EstimatedHistogram estimatedRowSize)
@@ -172,6 +190,7 @@ public class SSTableMetadata
             EstimatedHistogram.serializer.serialize(sstableStats.getEstimatedColumnCount(), dos);
             ReplayPosition.serializer.serialize(sstableStats.getReplayPosition(), dos);
             dos.writeLong(sstableStats.getMaxTimestamp());
+            dos.writeDouble(sstableStats.getCompressionRatio());
         }
 
         public SSTableMetadata deserialize(Descriptor descriptor) throws IOException
@@ -187,7 +206,7 @@ public class SSTableMetadata
             DataInputStream dis = new DataInputStream(new BufferedInputStream(new FileInputStream(statsFile)));
             try
             {
-                return deserialize(dis, descriptor.metadataIncludesReplayPosition, descriptor.tracksMaxTimestamp);
+                return deserialize(dis, descriptor);
             }
             finally
             {
@@ -195,16 +214,18 @@ public class SSTableMetadata
             }
         }
 
-        public SSTableMetadata deserialize(DataInputStream dis, boolean includesReplayPosition, boolean tracksMaxTimestamp) throws IOException
+        public SSTableMetadata deserialize(DataInputStream dis, Descriptor desc) throws IOException
         {
             EstimatedHistogram rowSizes = EstimatedHistogram.serializer.deserialize(dis);
             EstimatedHistogram columnCounts = EstimatedHistogram.serializer.deserialize(dis);
-            ReplayPosition replayPosition = includesReplayPosition
+            ReplayPosition replayPosition = desc.metadataIncludesReplayPosition
                                           ? ReplayPosition.serializer.deserialize(dis)
                                           : ReplayPosition.NONE;
-            long maxTimestamp = tracksMaxTimestamp ? dis.readLong() : Long.MIN_VALUE;
-
-            return new SSTableMetadata(rowSizes, columnCounts, replayPosition, maxTimestamp);
+            long maxTimestamp = desc.tracksMaxTimestamp ? dis.readLong() : Long.MIN_VALUE;
+            double compressionRatio = desc.hasCompressionRatio
+                                        ? dis.readDouble()
+                                        : Double.MIN_VALUE;
+            return new SSTableMetadata(rowSizes, columnCounts, replayPosition, maxTimestamp, compressionRatio);
         }
     }
 }

Modified: cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/sstable/SSTableReader.java?rev=1200251&r1=1200250&r2=1200251&view=diff
==============================================================================
--- cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/sstable/SSTableReader.java (original)
+++ cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/sstable/SSTableReader.java Thu Nov 10 10:07:25 2011
@@ -894,6 +894,11 @@ public class SSTableReader extends SSTab
         return sstableMetadata.getEstimatedColumnCount();
     }
 
+    public double getCompressionRatio()
+    {
+        return sstableMetadata.getCompressionRatio();
+    }
+
     public ReplayPosition getReplayPosition()
     {
         return sstableMetadata.getReplayPosition();

Modified: cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java?rev=1200251&r1=1200250&r2=1200251&view=diff
==============================================================================
--- cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java (original)
+++ cassandra/branches/cassandra-1.0/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java Thu Nov 10 10:07:25 2011
@@ -89,7 +89,8 @@ public class SSTableWriter extends SSTab
             dataFile = CompressedSequentialWriter.open(getFilename(),
                                                        descriptor.filenameFor(Component.COMPRESSION_INFO),
                                                        true,
-                                                       metadata.compressionParameters());
+                                                       metadata.compressionParameters(),
+                                                       sstableMetadataCollector);
         }
         else
         {

Modified: cassandra/branches/cassandra-1.0/test/unit/org/apache/cassandra/io/compress/CompressedRandomAccessReaderTest.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0/test/unit/org/apache/cassandra/io/compress/CompressedRandomAccessReaderTest.java?rev=1200251&r1=1200250&r2=1200251&view=diff
==============================================================================
--- cassandra/branches/cassandra-1.0/test/unit/org/apache/cassandra/io/compress/CompressedRandomAccessReaderTest.java (original)
+++ cassandra/branches/cassandra-1.0/test/unit/org/apache/cassandra/io/compress/CompressedRandomAccessReaderTest.java Thu Nov 10 10:07:25 2011
@@ -24,6 +24,7 @@ import java.util.concurrent.Callable;
 
 import org.junit.Test;
 
+import org.apache.cassandra.io.sstable.SSTableMetadata;
 import org.apache.cassandra.io.util.*;
 
 import static org.junit.Assert.assertEquals;
@@ -54,8 +55,9 @@ public class CompressedRandomAccessReade
 
         try
         {
+            SSTableMetadata.Collector sstableMetadataCollector = SSTableMetadata.createCollector().replayPosition(null);
             SequentialWriter writer = compressed
-                ? new CompressedSequentialWriter(f, filename + ".metadata", false, new CompressionParameters(SnappyCompressor.instance))
+                ? new CompressedSequentialWriter(f, filename + ".metadata", false, new CompressionParameters(SnappyCompressor.instance), sstableMetadataCollector)
                 : new SequentialWriter(f, CompressionParameters.DEFAULT_CHUNK_LENGTH, false);
 
             writer.write("The quick ".getBytes());
@@ -104,7 +106,8 @@ public class CompressedRandomAccessReade
         File metadata = new File(file.getPath() + ".meta");
         metadata.deleteOnExit();
 
-        SequentialWriter writer = new CompressedSequentialWriter(file, metadata.getPath(), false, new CompressionParameters(SnappyCompressor.instance));
+        SSTableMetadata.Collector sstableMetadataCollector = SSTableMetadata.createCollector().replayPosition(null);
+        SequentialWriter writer = new CompressedSequentialWriter(file, metadata.getPath(), false, new CompressionParameters(SnappyCompressor.instance), sstableMetadataCollector);
 
         writer.write(CONTENT.getBytes());
         writer.close();

Modified: cassandra/branches/cassandra-1.0/test/unit/org/apache/cassandra/io/sstable/SSTableMetadataSerializerTest.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0/test/unit/org/apache/cassandra/io/sstable/SSTableMetadataSerializerTest.java?rev=1200251&r1=1200250&r2=1200251&view=diff
==============================================================================
--- cassandra/branches/cassandra-1.0/test/unit/org/apache/cassandra/io/sstable/SSTableMetadataSerializerTest.java (original)
+++ cassandra/branches/cassandra-1.0/test/unit/org/apache/cassandra/io/sstable/SSTableMetadataSerializerTest.java Thu Nov 10 10:07:25 2011
@@ -22,6 +22,7 @@ import java.io.ByteArrayOutputStream;
 import java.io.ByteArrayInputStream;
 import java.io.DataOutputStream;
 import java.io.DataInputStream;
+import java.io.File;
 import java.io.IOException;
 
 import org.junit.Test;
@@ -58,7 +59,8 @@ public class SSTableMetadataSerializerTe
 
         ByteArrayInputStream byteInput = new ByteArrayInputStream(byteOutput.toByteArray());
         DataInputStream dis = new DataInputStream(byteInput);
-        SSTableMetadata stats = SSTableMetadata.serializer.deserialize(dis, true, true);
+        Descriptor desc = new Descriptor(Descriptor.CURRENT_VERSION, new File("."), "", "", 0, false);
+        SSTableMetadata stats = SSTableMetadata.serializer.deserialize(dis, desc);
 
         assert stats.getEstimatedRowSize().equals(originalMetadata.getEstimatedRowSize());
         assert stats.getEstimatedRowSize().equals(rowSizes);