You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by to...@apache.org on 2018/05/03 18:14:42 UTC
kudu git commit: tool: improve format for dumping a rowset
Repository: kudu
Updated Branches:
refs/heads/master eded05723 -> d19844f33
tool: improve format for dumping a rowset
This changes the output for 'kudu local_replica dump rowset' to be more
human-readable. The output now looks like this:
RowIdxInBlock: 0; Base: (int32 key=0, int32 int_val=0, string string_val="HelloWorld"); Undo Mutations: [@1(DELETE)]; Redo Mutations: [];
RowIdxInBlock: 1; Base: (int32 key=1, int32 int_val=10, string string_val="HelloWorld"); Undo Mutations: [@2(DELETE)]; Redo Mutations: [];
RowIdxInBlock: 2; Base: (int32 key=2, int32 int_val=20, string string_val="HelloWorld"); Undo Mutations: [@3(DELETE)]; Redo Mutations: [];
...
rather than separately dumping each column block. Dumping individual blocks is
still possible by using the cfile dump commands.
Change-Id: I0f1d08e08d2a3d20a87e49bb5338bf0585bd8e40
Reviewed-on: http://gerrit.cloudera.org:8080/3946
Tested-by: Todd Lipcon <to...@apache.org>
Reviewed-by: Alexey Serbin <as...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/d19844f3
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/d19844f3
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/d19844f3
Branch: refs/heads/master
Commit: d19844f33817c511476746b7a7ec07e6826d52a0
Parents: eded057
Author: Todd Lipcon <to...@apache.org>
Authored: Mon Apr 23 18:03:24 2018 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Thu May 3 18:05:27 2018 +0000
----------------------------------------------------------------------
src/kudu/tools/kudu-tool-test.cc | 10 +-
src/kudu/tools/tool_action_local_replica.cc | 230 ++---------------------
2 files changed, 23 insertions(+), 217 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/d19844f3/src/kudu/tools/kudu-tool-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index d436f9e..fcd4e0f 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -1145,11 +1145,11 @@ TEST_F(ToolTest, TestLocalReplicaOps) {
ASSERT_STR_CONTAINS(stdout, "bloom_block {");
ASSERT_STR_MATCHES(stdout, "id: .*");
ASSERT_STR_CONTAINS(stdout, "undo_deltas {");
- ASSERT_STR_MATCHES(stdout, "CFile Header: ");
- ASSERT_STR_MATCHES(stdout, "Delta stats:.*");
- ASSERT_STR_MATCHES(stdout, "ts range=.*");
- ASSERT_STR_MATCHES(stdout, "update_counts_by_col_id=.*");
- ASSERT_STR_MATCHES(stdout, "Dumping column block.*for column id.*");
+
+ ASSERT_STR_CONTAINS(stdout,
+ "RowIdxInBlock: 0; Base: (int32 key=0, int32 int_val=0,"
+ " string string_val=\"HelloWorld\"); "
+ "Undo Mutations: [@1(DELETE)]; Redo Mutations: [];");
ASSERT_STR_MATCHES(stdout, ".*---------------------.*");
// This is expected to fail with Invalid argument for kRowId.
http://git-wip-us.apache.org/repos/asf/kudu/blob/d19844f3/src/kudu/tools/tool_action_local_replica.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_action_local_replica.cc b/src/kudu/tools/tool_action_local_replica.cc
index ce6a951..c65e51e 100644
--- a/src/kudu/tools/tool_action_local_replica.cc
+++ b/src/kudu/tools/tool_action_local_replica.cc
@@ -15,13 +15,9 @@
// specific language governing permissions and limitations
// under the License.
-#include "kudu/tools/tool_action.h"
-
-#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <iostream>
-#include <list>
#include <map>
#include <memory>
#include <string>
@@ -34,19 +30,15 @@
#include <gflags/gflags.h>
#include <glog/logging.h>
-#include "kudu/cfile/cfile.pb.h"
-#include "kudu/cfile/cfile_reader.h"
-#include "kudu/cfile/cfile_util.h"
#include "kudu/common/common.pb.h"
#include "kudu/common/partition.h"
-#include "kudu/common/row_changelist.h"
-#include "kudu/common/rowblock.h"
#include "kudu/common/schema.h"
#include "kudu/common/wire_protocol.h"
#include "kudu/consensus/consensus.pb.h"
#include "kudu/consensus/consensus_meta.h"
#include "kudu/consensus/consensus_meta_manager.h"
#include "kudu/consensus/log.pb.h"
+#include "kudu/consensus/log_anchor_registry.h"
#include "kudu/consensus/log_index.h"
#include "kudu/consensus/log_reader.h"
#include "kudu/consensus/log_util.h"
@@ -55,35 +47,28 @@
#include "kudu/fs/block_id.h"
#include "kudu/fs/block_manager.h"
#include "kudu/fs/fs_manager.h"
-#include "kudu/gutil/gscoped_ptr.h"
#include "kudu/gutil/map-util.h"
#include "kudu/gutil/ref_counted.h"
#include "kudu/gutil/strings/human_readable.h"
#include "kudu/gutil/strings/join.h"
#include "kudu/gutil/strings/numbers.h"
-#include "kudu/gutil/strings/split.h"
#include "kudu/gutil/strings/stringpiece.h"
#include "kudu/gutil/strings/substitute.h"
#include "kudu/gutil/strings/util.h"
#include "kudu/master/sys_catalog.h"
#include "kudu/rpc/messenger.h"
-#include "kudu/tablet/cfile_set.h"
-#include "kudu/tablet/delta_key.h"
-#include "kudu/tablet/delta_stats.h"
-#include "kudu/tablet/delta_store.h"
-#include "kudu/tablet/deltafile.h"
+#include "kudu/tablet/diskrowset.h"
#include "kudu/tablet/metadata.pb.h"
-#include "kudu/tablet/mvcc.h"
#include "kudu/tablet/rowset_metadata.h"
+#include "kudu/tablet/tablet_mem_trackers.h"
#include "kudu/tablet/tablet_metadata.h"
#include "kudu/tablet/tablet_replica.h"
+#include "kudu/tools/tool_action.h"
#include "kudu/tools/tool_action_common.h"
#include "kudu/tserver/tablet_copy_client.h"
#include "kudu/tserver/ts_tablet_manager.h"
#include "kudu/util/env.h"
#include "kudu/util/env_util.h"
-#include "kudu/util/mem_tracker.h"
-#include "kudu/util/memory/arena.h"
#include "kudu/util/metrics.h"
#include "kudu/util/net/net_util.h"
#include "kudu/util/pb_util.h"
@@ -108,10 +93,6 @@ DEFINE_bool(clean_unsafe, false,
namespace kudu {
namespace tools {
-using cfile::CFileIterator;
-using cfile::CFileReader;
-using cfile::DumpIterator;
-using cfile::ReaderOptions;
using consensus::ConsensusMetadata;
using consensus::ConsensusMetadataManager;
using consensus::OpId;
@@ -128,21 +109,14 @@ using rpc::Messenger;
using rpc::MessengerBuilder;
using std::cout;
using std::endl;
-using std::list;
using std::map;
using std::pair;
using std::shared_ptr;
using std::string;
using std::unique_ptr;
using std::vector;
-using strings::Split;
using strings::Substitute;
-using tablet::CFileSet;
-using tablet::DeltaFileReader;
-using tablet::DeltaIterator;
-using tablet::DeltaKeyAndUpdate;
-using tablet::DeltaType;
-using tablet::MvccSnapshot;
+using tablet::DiskRowSet;
using tablet::RowSetMetadata;
using tablet::TabletMetadata;
using tablet::TabletDataState;
@@ -645,141 +619,7 @@ Status ListLocalReplicas(const RunnerContext& context) {
return Status::OK();
}
-Status DumpCFileBlockInternal(FsManager* fs_manager,
- const BlockId& block_id,
- int indent) {
- unique_ptr<ReadableBlock> block;
- RETURN_NOT_OK(fs_manager->OpenBlock(block_id, &block));
- unique_ptr<CFileReader> reader;
- RETURN_NOT_OK(CFileReader::Open(std::move(block), ReaderOptions(), &reader));
-
- cout << Indent(indent) << "CFile Header: "
- << pb_util::SecureShortDebugString(reader->header()) << endl;
- if (!FLAGS_dump_data) {
- return Status::OK();
- }
- cout << Indent(indent) << reader->footer().num_values()
- << " values:" << endl;
-
- gscoped_ptr<CFileIterator> it;
- RETURN_NOT_OK(reader->NewIterator(&it, CFileReader::DONT_CACHE_BLOCK));
- RETURN_NOT_OK(it->SeekToFirst());
- return DumpIterator(*reader, it.get(), &cout, FLAGS_nrows, indent + 2);
-}
-
-Status DumpDeltaCFileBlockInternal(FsManager* fs_manager,
- const Schema& schema,
- const shared_ptr<RowSetMetadata>& rs_meta,
- const BlockId& block_id,
- DeltaType delta_type,
- int indent) {
- // Open the delta reader
- unique_ptr<ReadableBlock> readable_block;
- RETURN_NOT_OK(fs_manager->OpenBlock(block_id, &readable_block));
- shared_ptr<DeltaFileReader> delta_reader;
- RETURN_NOT_OK(DeltaFileReader::Open(std::move(readable_block),
- delta_type,
- ReaderOptions(),
- &delta_reader));
-
- cout << Indent(indent) << "Delta stats: "
- << delta_reader->delta_stats().ToString() << endl;
- if (FLAGS_metadata_only) {
- return Status::OK();
- }
-
- // Create the delta iterator.
- // TODO: see if it's worth re-factoring NewDeltaIterator to return a
- // gscoped_ptr that can then be released if we need a raw or shared
- // pointer.
- DeltaIterator* raw_iter;
-
- MvccSnapshot snap_all;
- if (delta_type == tablet::REDO) {
- snap_all = MvccSnapshot::CreateSnapshotIncludingAllTransactions();
- } else if (delta_type == tablet::UNDO) {
- snap_all = MvccSnapshot::CreateSnapshotIncludingNoTransactions();
- }
-
- Status s = delta_reader->NewDeltaIterator(&schema, snap_all, &raw_iter);
-
- if (s.IsNotFound()) {
- cout << "Empty delta block." << endl;
- return Status::OK();
- }
- RETURN_NOT_OK(s);
-
- // NewDeltaIterator returns Status::OK() iff a new DeltaIterator is created. Thus,
- // it's safe to have a unique_ptr take possesion of 'raw_iter' here.
- unique_ptr<DeltaIterator> delta_iter(raw_iter);
- RETURN_NOT_OK(delta_iter->Init(NULL));
- RETURN_NOT_OK(delta_iter->SeekToOrdinal(0));
-
- // TODO: it's awkward that whenever we want to iterate over deltas we also
- // need to open the CFileSet for the rowset. Ideally, we should use
- // information stored in the footer/store additional information in the
- // footer as to make it feasible iterate over all deltas using a
- // DeltaFileIterator alone.
- shared_ptr<CFileSet> cfileset;
- RETURN_NOT_OK(CFileSet::Open(rs_meta, MemTracker::GetRootTracker(), &cfileset));
- gscoped_ptr<CFileSet::Iterator> cfileset_iter(cfileset->NewIterator(&schema));
-
- RETURN_NOT_OK(cfileset_iter->Init(NULL));
-
- const size_t kRowsPerBlock = 100;
- size_t nrows = 0;
- size_t ndeltas = 0;
- Arena arena(32 * 1024);
- RowBlock block(schema, kRowsPerBlock, &arena);
-
- // See tablet/delta_compaction.cc to understand why this loop is structured the way
- // it is.
- while (cfileset_iter->HasNext()) {
- size_t n;
- if (FLAGS_nrows > 0) {
- // Note: number of deltas may not equal the number of rows, but
- // since this is a CLI tool (and the nrows option exists
- // primarily to limit copious output) it's okay not to be
- // exact here.
- size_t remaining = FLAGS_nrows - nrows;
- if (remaining == 0) break;
- n = std::min(remaining, kRowsPerBlock);
- } else {
- n = kRowsPerBlock;
- }
-
- arena.Reset();
- cfileset_iter->PrepareBatch(&n);
-
- block.Resize(n);
-
- RETURN_NOT_OK(delta_iter->PrepareBatch(
- n, DeltaIterator::PREPARE_FOR_COLLECT));
- vector<DeltaKeyAndUpdate> out;
- RETURN_NOT_OK(
- delta_iter->FilterColumnIdsAndCollectDeltas(vector<ColumnId>(),
- &out,
- &arena));
- for (const DeltaKeyAndUpdate& upd : out) {
- if (FLAGS_dump_data) {
- cout << Indent(indent) << upd.key.ToString() << " "
- << RowChangeList(upd.cell).ToString(schema) << endl;
- ++ndeltas;
- }
- }
- RETURN_NOT_OK(cfileset_iter->FinishBatch());
-
- nrows += n;
- }
-
- VLOG(1) << "Processed " << ndeltas << " deltas, for total of "
- << nrows << " possible rows.";
- return Status::OK();
-}
-
-Status DumpRowSetInternal(FsManager* fs_manager,
- const Schema& schema,
- const shared_ptr<RowSetMetadata>& rs_meta,
+Status DumpRowSetInternal(const shared_ptr<RowSetMetadata>& rs_meta,
int indent) {
tablet::RowSetDataPB pb;
rs_meta->ToProtobuf(&pb);
@@ -787,48 +627,16 @@ Status DumpRowSetInternal(FsManager* fs_manager,
cout << Indent(indent) << "RowSet metadata: " << pb_util::SecureDebugString(pb)
<< endl << endl;
- RowSetMetadata::ColumnIdToBlockIdMap col_blocks =
- rs_meta->GetColumnBlocksById();
- for (const RowSetMetadata::ColumnIdToBlockIdMap::value_type& e :
- col_blocks) {
- ColumnId col_id = e.first;
- const BlockId& block_id = e.second;
-
- cout << Indent(indent) << "Dumping column block " << block_id
- << " for column id " << col_id;
- int col_idx = schema.find_column_by_id(col_id);
- if (col_idx != -1) {
- cout << "( " << schema.column(col_idx).ToString() << ")";
- }
- cout << ":" << endl;
- cout << Indent(indent) << kSeparatorLine;
- if (FLAGS_metadata_only) continue;
- RETURN_NOT_OK(DumpCFileBlockInternal(fs_manager, block_id, indent));
- cout << endl;
- }
-
- for (const BlockId& block : rs_meta->undo_delta_blocks()) {
- cout << Indent(indent) << "Dumping undo delta block " << block << ":"
- << endl << Indent(indent) << kSeparatorLine;
- RETURN_NOT_OK(DumpDeltaCFileBlockInternal(fs_manager,
- schema,
- rs_meta,
- block,
- tablet::UNDO,
- indent));
- cout << endl;
- }
-
- for (const BlockId& block : rs_meta->redo_delta_blocks()) {
- cout << Indent(indent) << "Dumping redo delta block " << block << ":"
- << endl << Indent(indent) << kSeparatorLine;
- RETURN_NOT_OK(DumpDeltaCFileBlockInternal(fs_manager,
- schema,
- rs_meta,
- block,
- tablet::REDO,
- indent));
- cout << endl;
+ scoped_refptr<log::LogAnchorRegistry> log_reg(new log::LogAnchorRegistry());
+ shared_ptr<DiskRowSet> rs;
+ RETURN_NOT_OK(DiskRowSet::Open(rs_meta,
+ log_reg.get(),
+ tablet::TabletMemTrackers(),
+ &rs));
+ vector<string> lines;
+ RETURN_NOT_OK(rs->DebugDump(&lines));
+ for (const auto& l : lines) {
+ cout << l << endl;
}
return Status::OK();
@@ -851,8 +659,7 @@ Status DumpRowSet(const RunnerContext& context) {
if (FLAGS_rowset_index != -1) {
for (const shared_ptr<RowSetMetadata>& rs_meta : meta->rowsets()) {
if (rs_meta->id() == FLAGS_rowset_index) {
- return DumpRowSetInternal(fs_manager.get(), meta->schema(),
- rs_meta, 0);
+ return Status::OK();
}
}
return Status::InvalidArgument(
@@ -864,8 +671,7 @@ Status DumpRowSet(const RunnerContext& context) {
size_t idx = 0;
for (const shared_ptr<RowSetMetadata>& rs_meta : meta->rowsets()) {
cout << endl << "Dumping rowset " << idx++ << endl << kSeparatorLine;
- RETURN_NOT_OK(DumpRowSetInternal(fs_manager.get(), meta->schema(),
- rs_meta, 2));
+ RETURN_NOT_OK(DumpRowSetInternal(rs_meta, 2));
}
return Status::OK();
}