You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by to...@apache.org on 2016/12/08 23:42:12 UTC

[2/2] kudu git commit: KUDU-1524. Add a workaround for unflushable large cells

KUDU-1524. Add a workaround for unflushable large cells

Previously, we had a hard-coded limit of 16MB for an individual cfile
block. This would cause a CHECK failure if someone inserted a cell
larger than this size.

We should probably limit large cells in the write path in a separate
patch, but it was also a bad idea to have this limit be a constant
instead of an 'unsafe' flag. This switches to using a flag for the value
so that, if we do end up in a situation like this, we can work around it
by bumping the flag instead of recompiling.

This also fixes the size limiting to be symmetric: we now always check
the size of the *uncompressed* block, which ensures that if we're able
to write a block, we will later be able to read it.

Change-Id: I245b52f2bc8b9d95716cacd340dca93f64846c73
Reviewed-on: http://gerrit.cloudera.org:8080/5282
Tested-by: Kudu Jenkins
Reviewed-by: Dan Burkert <da...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/e1acdfba
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/e1acdfba
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/e1acdfba

Branch: refs/heads/master
Commit: e1acdfba66913819f9c5d604ad5449e1eca83766
Parents: 9854857
Author: Todd Lipcon <to...@apache.org>
Authored: Wed Nov 30 12:23:38 2016 -0800
Committer: Todd Lipcon <to...@apache.org>
Committed: Thu Dec 8 23:38:14 2016 +0000

----------------------------------------------------------------------
 src/kudu/cfile/block_compression.cc | 48 ++++++++++++++++++++------------
 src/kudu/cfile/block_compression.h  |  6 ++--
 src/kudu/cfile/cfile_reader.cc      |  4 +--
 src/kudu/cfile/cfile_writer.cc      |  3 +-
 4 files changed, 34 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/e1acdfba/src/kudu/cfile/block_compression.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/block_compression.cc b/src/kudu/cfile/block_compression.cc
index 61792b7..de43ae9 100644
--- a/src/kudu/cfile/block_compression.cc
+++ b/src/kudu/cfile/block_compression.cc
@@ -17,22 +17,32 @@
 
 #include <glog/logging.h>
 #include <algorithm>
+#include <gflags/gflags.h>
 
 #include "kudu/cfile/block_compression.h"
 #include "kudu/gutil/gscoped_ptr.h"
 #include "kudu/gutil/stringprintf.h"
-#include "kudu/util/coding.h"
+#include "kudu/gutil/strings/substitute.h"
 #include "kudu/util/coding-inl.h"
+#include "kudu/util/coding.h"
+#include "kudu/util/flag_tags.h"
+
+DEFINE_int64(max_cfile_block_size, 16 * 1024 * 1024,
+             "The maximum size of an uncompressed CFile block when using compression. "
+             "Blocks larger than this will prevent flushing.");
+// Mark this flag unsafe since we're likely to hit other downstream issues with
+// cells that are this large, and we haven't tested these scenarios. The purpose
+// of this flag is just to provide an 'escape hatch' if we somehow insert a too-large
+// value.
+TAG_FLAG(max_cfile_block_size, unsafe);
 
 namespace kudu {
 namespace cfile {
 
 using std::vector;
 
-CompressedBlockBuilder::CompressedBlockBuilder(const CompressionCodec* codec,
-                                               size_t size_limit)
-  : codec_(DCHECK_NOTNULL(codec)),
-    compressed_size_limit_(size_limit) {
+CompressedBlockBuilder::CompressedBlockBuilder(const CompressionCodec* codec)
+  : codec_(DCHECK_NOTNULL(codec)) {
 }
 
 Status CompressedBlockBuilder::Compress(const Slice& data, Slice *result) {
@@ -47,15 +57,19 @@ Status CompressedBlockBuilder::Compress(const vector<Slice> &data_slices, Slice
     data_size += data.size();
   }
 
-  // Ensure that the buffer for header + compressed data is large enough
-  size_t max_compressed_size = codec_->MaxCompressedLength(data_size);
-  if (max_compressed_size > compressed_size_limit_) {
-    return Status::InvalidArgument(
-      StringPrintf("estimated max size %lu is greater than the expected %lu",
-        max_compressed_size, compressed_size_limit_));
+  // On the read side, we won't read any data which uncompresses larger than the
+  // configured maximum. So, we should prevent writing any data which would later
+  // be unreadable.
+  if (data_size > FLAGS_max_cfile_block_size) {
+    return Status::InvalidArgument(strings::Substitute(
+        "uncompressed block size $0 is greater than the configured maximum "
+        "size $1", data_size, FLAGS_max_cfile_block_size));
   }
 
-  buffer_.resize(kHeaderReservedLength + max_compressed_size);
+  // Ensure that the buffer for header + compressed data is large enough
+  // for the upper bound compressed size reported by the codec.
+  size_t ub_compressed_size = codec_->MaxCompressedLength(data_size);
+  buffer_.resize(kHeaderReservedLength + ub_compressed_size);
 
   // Compress
   size_t compressed_size;
@@ -70,10 +84,8 @@ Status CompressedBlockBuilder::Compress(const vector<Slice> &data_slices, Slice
   return Status::OK();
 }
 
-CompressedBlockDecoder::CompressedBlockDecoder(const CompressionCodec* codec,
-                                               size_t size_limit)
-  : codec_(DCHECK_NOTNULL(codec)),
-    uncompressed_size_limit_(size_limit) {
+CompressedBlockDecoder::CompressedBlockDecoder(const CompressionCodec* codec)
+  : codec_(DCHECK_NOTNULL(codec)) {
 }
 
 Status CompressedBlockDecoder::ValidateHeader(const Slice& data, uint32_t *uncompressed_size) {
@@ -99,10 +111,10 @@ Status CompressedBlockDecoder::ValidateHeader(const Slice& data, uint32_t *uncom
   }
 
   // Check if uncompressed size seems to be reasonable
-  if (*uncompressed_size > uncompressed_size_limit_) {
+  if (*uncompressed_size > FLAGS_max_cfile_block_size) {
     return Status::Corruption(
       StringPrintf("uncompressed size %u overflows the maximum length %lu, buffer",
-        compressed_size, uncompressed_size_limit_), data.ToDebugString(50));
+        compressed_size, FLAGS_max_cfile_block_size), data.ToDebugString(50));
   }
 
   return Status::OK();

http://git-wip-us.apache.org/repos/asf/kudu/blob/e1acdfba/src/kudu/cfile/block_compression.h
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/block_compression.h b/src/kudu/cfile/block_compression.h
index 14c7bff..4316de3 100644
--- a/src/kudu/cfile/block_compression.h
+++ b/src/kudu/cfile/block_compression.h
@@ -34,7 +34,7 @@ namespace cfile {
 class CompressedBlockBuilder {
  public:
   // 'codec' is expected to remain alive for the lifetime of this object.
-  CompressedBlockBuilder(const CompressionCodec* codec, size_t size_limit);
+  explicit CompressedBlockBuilder(const CompressionCodec* codec);
 
   // Sets "*result" to the compressed version of the "data".
   // The data inside the result is owned by the CompressedBlockBuilder class
@@ -51,13 +51,12 @@ class CompressedBlockBuilder {
   DISALLOW_COPY_AND_ASSIGN(CompressedBlockBuilder);
   const CompressionCodec* codec_;
   faststring buffer_;
-  size_t compressed_size_limit_;
 };
 
 class CompressedBlockDecoder {
  public:
   // 'codec' is expected to remain alive for the lifetime of this object.
-  CompressedBlockDecoder(const CompressionCodec* codec, size_t size_limit);
+  explicit CompressedBlockDecoder(const CompressionCodec* codec);
 
   // Sets "*result" to the uncompressed version of the "data".
   // It is the caller's responsibility to free the result data.
@@ -85,7 +84,6 @@ class CompressedBlockDecoder {
  private:
   DISALLOW_COPY_AND_ASSIGN(CompressedBlockDecoder);
   const CompressionCodec* codec_;
-  size_t uncompressed_size_limit_;
 };
 
 } // namespace cfile

http://git-wip-us.apache.org/repos/asf/kudu/blob/e1acdfba/src/kudu/cfile/cfile_reader.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/cfile_reader.cc b/src/kudu/cfile/cfile_reader.cc
index d2ec70b..077d6f4 100644
--- a/src/kudu/cfile/cfile_reader.cc
+++ b/src/kudu/cfile/cfile_reader.cc
@@ -60,8 +60,6 @@ const char* CFILE_CACHE_HIT_BYTES_METRIC_NAME = "cfile_cache_hit_bytes";
 static const size_t kMagicAndLengthSize = 12;
 static const size_t kMaxHeaderFooterPBSize = 64*1024;
 
-static const size_t kBlockSizeLimit = 16 * 1024 * 1024; // 16MB
-
 static Status ParseMagicAndLength(const Slice &data,
                                   uint32_t *parsed_len) {
   if (data.size() != kMagicAndLengthSize) {
@@ -213,7 +211,7 @@ Status CFileReader::ReadAndParseFooter() {
   if (footer_->compression() != NO_COMPRESSION) {
     const CompressionCodec* codec;
     RETURN_NOT_OK(GetCompressionCodec(footer_->compression(), &codec));
-    block_uncompressor_.reset(new CompressedBlockDecoder(codec, kBlockSizeLimit));
+    block_uncompressor_.reset(new CompressedBlockDecoder(codec));
   }
 
   VLOG(2) << "Read footer: " << footer_->DebugString();

http://git-wip-us.apache.org/repos/asf/kudu/blob/e1acdfba/src/kudu/cfile/cfile_writer.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/cfile_writer.cc b/src/kudu/cfile/cfile_writer.cc
index 018b309..578d857 100644
--- a/src/kudu/cfile/cfile_writer.cc
+++ b/src/kudu/cfile/cfile_writer.cc
@@ -69,7 +69,6 @@ namespace cfile {
 
 const char kMagicString[] = "kuducfil";
 
-static const size_t kBlockSizeLimit = 16 * 1024 * 1024; // 16MB
 static const size_t kMinBlockSize = 512;
 
 static CompressionType GetDefaultCompressionCodec() {
@@ -153,7 +152,7 @@ Status CFileWriter::Start() {
   if (compression_ != NO_COMPRESSION) {
     const CompressionCodec* codec;
     RETURN_NOT_OK(GetCompressionCodec(compression_, &codec));
-    block_compressor_ .reset(new CompressedBlockBuilder(codec, kBlockSizeLimit));
+    block_compressor_ .reset(new CompressedBlockBuilder(codec));
   }
 
   CFileHeaderPB header;