You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by zg...@apache.org on 2019/03/12 12:45:49 UTC
[hbase] 61/133: HBASE-17278 [C++] Cell Scanner and KeyValueCodec
for encoding cells in RPC (Sudeep Sunthankar and enis)
This is an automated email from the ASF dual-hosted git repository.
zghao pushed a commit to branch HBASE-14850
in repository https://gitbox.apache.org/repos/asf/hbase.git
commit 9a99925333c63215ef1c70948ce99dc06c8503d3
Author: Enis Soztutar <en...@apache.org>
AuthorDate: Thu Feb 9 11:26:47 2017 -0800
HBASE-17278 [C++] Cell Scanner and KeyValueCodec for encoding cells in RPC (Sudeep Sunthankar and enis)
---
hbase-native-client/.gitignore | 3 +-
hbase-native-client/core/BUCK | 2 +
hbase-native-client/core/keyvalue-codec.cc | 69 ++++++++++++
hbase-native-client/core/keyvalue-codec.h | 147 ++++++++++++++++++++++++++
hbase-native-client/serde/BUCK | 3 +
hbase-native-client/serde/cell-outputstream.h | 53 ++++++++++
hbase-native-client/serde/cell-scanner.h | 50 +++++++++
hbase-native-client/serde/codec.h | 50 +++++++++
8 files changed, 376 insertions(+), 1 deletion(-)
diff --git a/hbase-native-client/.gitignore b/hbase-native-client/.gitignore
index b172fe9..c0032e4 100644
--- a/hbase-native-client/.gitignore
+++ b/hbase-native-client/.gitignore
@@ -22,4 +22,5 @@ buck-out
*.swp
# Thirdparty dirs
-third-party/googletest*
+third-party/*
+/gcc-debug/
diff --git a/hbase-native-client/core/BUCK b/hbase-native-client/core/BUCK
index f1880a4..d8d15a9 100644
--- a/hbase-native-client/core/BUCK
+++ b/hbase-native-client/core/BUCK
@@ -22,6 +22,7 @@ cxx_library(
"client.h",
"cell.h",
"hbase_macros.h",
+ "keyvalue-codec.h",
"region-location.h",
"location-cache.h",
# TODO: move this out of exported
@@ -40,6 +41,7 @@ cxx_library(
srcs=[
"cell.cc",
"client.cc",
+ "keyvalue-codec.cc",
"location-cache.cc",
"meta-utils.cc",
"get.cc",
diff --git a/hbase-native-client/core/keyvalue-codec.cc b/hbase-native-client/core/keyvalue-codec.cc
new file mode 100644
index 0000000..1b526b9
--- /dev/null
+++ b/hbase-native-client/core/keyvalue-codec.cc
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include "core/keyvalue-codec.h"
+
+#include <string>
+
+namespace hbase {
+
+KeyValueCodec::KVDecoder::KVDecoder(std::unique_ptr<folly::IOBuf> cell_block, uint32_t offset,
+ uint32_t length)
+ : cell_block_(std::move(cell_block)), offset_(offset), length_(length) {}
+
+KeyValueCodec::KVDecoder::~KVDecoder() {}
+
+std::shared_ptr<Cell> KeyValueCodec::KVDecoder::Decode(folly::io::Cursor &cursor) {
+ uint32_t key_length = cursor.readBE<uint32_t>();
+ uint32_t value_length = cursor.readBE<uint32_t>();
+ uint16_t row_length = cursor.readBE<uint16_t>();
+ std::string row = cursor.readFixedString(row_length);
+ uint8_t column_family_length = cursor.readBE<uint8_t>();
+ std::string column_family = cursor.readFixedString(column_family_length);
+ int qualifier_length =
+ key_length - (row_length + column_family_length + kHBaseSizeOfKeyInfrastructure_);
+ std::string column_qualifier = cursor.readFixedString(qualifier_length);
+ uint64_t timestamp = cursor.readBE<uint64_t>();
+ uint8_t key_type = cursor.readBE<uint8_t>();
+ std::string value = cursor.readFixedString(value_length);
+
+ return std::make_shared<Cell>(row, column_family, column_qualifier, timestamp, value,
+ static_cast<hbase::CellType>(key_type));
+}
+
+bool KeyValueCodec::KVDecoder::Advance() {
+ if (end_of_cell_block_) {
+ return false;
+ }
+
+ if (cur_pos_ == length_) {
+ end_of_cell_block_ = true;
+ return false;
+ }
+
+ folly::io::Cursor cursor(cell_block_.get());
+ cursor.skip(offset_ + cur_pos_);
+ uint32_t current_cell_size = cursor.readBE<uint32_t>();
+ current_cell_ = Decode(cursor);
+ cur_pos_ += kHBaseSizeOfInt_ + current_cell_size;
+ return true;
+}
+
+uint32_t KeyValueCodec::KVDecoder::CellBlockLength() const { return length_; }
+} /* namespace hbase */
diff --git a/hbase-native-client/core/keyvalue-codec.h b/hbase-native-client/core/keyvalue-codec.h
new file mode 100644
index 0000000..fd58346
--- /dev/null
+++ b/hbase-native-client/core/keyvalue-codec.h
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#pragma once
+
+#include <folly/io/Cursor.h>
+#include <folly/io/IOBuf.h>
+#include <memory>
+
+#include "core/cell.h"
+#include "serde/codec.h"
+
+namespace hbase {
+
+/**
+ * @brief Class for parsing sequence of Cells based on org.apache.hadoop.hbase.KeyValueCodec.java
+ *
+ * KeyValueCodec implements CellScanner interface. Sequence of cells are obtained from cell_block.
+ * We have CreateEncoder and CreateDecoder public methods which will return Encoder/Decoder
+ * instances which will be used to obtain individual cells in cell_block.
+ * Usage:-
+ * 1) Cell Decoding:-
+ * unique_ptr<CellScanner> cell_scanner = KeyValueCodec::CreateDecoder(cell_block, cb_start_offset,
+ * cb_length);
+ * while (cell_scanner->Advance()) {
+ * auto current_cell = cell_scanner->Current
+ * }
+ */
+class KeyValueCodec : public Codec {
+ public:
+ /**
+ * Constructor
+ */
+ KeyValueCodec() {}
+
+ std::unique_ptr<Codec::Encoder> CreateEncoder() override { return std::make_unique<KVEncoder>(); }
+ std::unique_ptr<Codec::Decoder> CreateDecoder(std::unique_ptr<folly::IOBuf> cell_block,
+ uint32_t offset, uint32_t length) override {
+ return std::make_unique<KVDecoder>(std::move(cell_block), offset, length);
+ }
+
+ /** @brief returns the java class name corresponding to this Codec implementation */
+ virtual const char* java_class_name() const override { return kJavaClassName; }
+
+ static constexpr const char* kJavaClassName = "org.apache.hadoop.hbase.codec.KeyValueCodec";
+
+ private:
+ class KVEncoder : public Codec::Encoder {
+ public:
+ KVEncoder() {}
+
+ void Write(const Cell& cell) {
+ // TODO: Encode Cells using KeyValueCodec wire format
+ }
+
+ void Flush() {}
+ };
+
+ class KVDecoder : public Codec::Decoder {
+ public:
+ KVDecoder(std::unique_ptr<folly::IOBuf> cell_block, uint32_t cell_block_start_offset,
+ uint32_t cell_block_length);
+ ~KVDecoder();
+
+ /**
+ * @brief Overridden from CellScanner. This method parses cell_block and stores the current in
+ * current_cell_. Current cell can be obtained using cell_scanner.Current();
+ */
+ bool Advance();
+
+ /**
+ * @brief returns the current cell
+ */
+ const std::shared_ptr<Cell> Current() const { return current_cell_; }
+
+ /**
+ * @brief returns the total length of cell_meta_block
+ */
+ uint32_t CellBlockLength() const;
+
+ private:
+ std::shared_ptr<Cell> Decode(folly::io::Cursor& cursor);
+
+ /**
+ * Size of boolean in bytes
+ */
+ const int kHBaseSizeOfBoolean_ = sizeof(uint8_t) / sizeof(uint8_t);
+
+ /**
+ * Size of byte in bytes
+ */
+ const uint8_t kHBaseSizeOfByte_ = kHBaseSizeOfBoolean_;
+
+ /**
+ * Size of int in bytes
+ */
+ const uint32_t kHBaseSizeOfInt_ = sizeof(uint32_t) / kHBaseSizeOfByte_;
+
+ /**
+ * Size of long in bytes
+ */
+ const uint64_t kHBaseSizeOfLong_ = sizeof(uint64_t) / kHBaseSizeOfByte_;
+
+ /**
+ * Size of Short in bytes
+ */
+ const uint16_t kHBaseSizeOfShort_ = sizeof(uint16_t) / kHBaseSizeOfByte_;
+
+ const uint32_t kHBaseSizeOfKeyLength_ = kHBaseSizeOfInt_;
+ const uint32_t kHBaseSizeOfValueLength_ = kHBaseSizeOfInt_;
+ const uint16_t kHBaseSizeOfRowLength_ = kHBaseSizeOfShort_;
+ const uint8_t kHBaseSizeOfFamilyLength_ = kHBaseSizeOfByte_;
+ const uint64_t kHBaseSizeOfTimestamp_ = kHBaseSizeOfLong_;
+ const uint8_t kHBaseSizeOfKeyType_ = kHBaseSizeOfByte_;
+ const uint32_t kHBaseSizeOfTimestampAndKey_ = kHBaseSizeOfTimestamp_ + kHBaseSizeOfKeyType_;
+ const uint32_t kHBaseSizeOfKeyInfrastructure_ =
+ kHBaseSizeOfRowLength_ + kHBaseSizeOfFamilyLength_ + kHBaseSizeOfTimestampAndKey_;
+ const uint32_t kHBaseSizeOfKeyValueInfrastructure_ =
+ kHBaseSizeOfKeyLength_ + kHBaseSizeOfValueLength_;
+
+ std::unique_ptr<folly::IOBuf> cell_block_ = nullptr;
+ uint32_t offset_ = 0;
+ uint32_t length_ = 0;
+ uint32_t cur_pos_ = 0;
+ bool end_of_cell_block_ = false;
+
+ std::shared_ptr<Cell> current_cell_ = nullptr;
+ };
+};
+
+} /* namespace hbase */
diff --git a/hbase-native-client/serde/BUCK b/hbase-native-client/serde/BUCK
index b5aad3d..c6809a8 100644
--- a/hbase-native-client/serde/BUCK
+++ b/hbase-native-client/serde/BUCK
@@ -18,6 +18,9 @@
cxx_library(
name="serde",
exported_headers=[
+ "cell-scanner.h",
+ "cell-outputstream.h",
+ "codec.h",
"region-info.h",
"rpc.h",
"server-name.h",
diff --git a/hbase-native-client/serde/cell-outputstream.h b/hbase-native-client/serde/cell-outputstream.h
new file mode 100644
index 0000000..963dd31
--- /dev/null
+++ b/hbase-native-client/serde/cell-outputstream.h
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#pragma once
+
+#include <memory>
+
+namespace hbase {
+
+class Cell;
+
+/**
+ * @brief Encoder / Decoder for Cells.
+ */
+class CellOutputStream {
+ public:
+ virtual ~CellOutputStream() {}
+
+ /**
+ * Implementation must copy the entire state of the Cell. If the written Cell is modified
+ * immediately after the write method returns, the modifications must have absolutely no effect
+ * on the copy of the Cell that was added in the write.
+ * @param cell Cell to write out
+ * @throws IOException
+ */
+ virtual void Write(const Cell& cell) = 0;
+
+ /**
+ * Let the implementation decide what to do. Usually means writing accumulated data into a
+ * byte[] that can then be read from the implementation to be sent to disk, put in the block
+ * cache, or sent over the network.
+ * @throws IOException
+ */
+ virtual void Flush() = 0;
+};
+
+} /* namespace hbase */
diff --git a/hbase-native-client/serde/cell-scanner.h b/hbase-native-client/serde/cell-scanner.h
new file mode 100644
index 0000000..fe4a249
--- /dev/null
+++ b/hbase-native-client/serde/cell-scanner.h
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#pragma once
+#include <folly/io/IOBuf.h>
+#include <memory>
+
+namespace hbase {
+
+class Cell;
+
+/**
+ * @brief Interface for iterating over a sequence of Cells
+ */
+class CellScanner {
+ public:
+ virtual ~CellScanner() {}
+
+ /**
+ * @brief This method will be used to iterate the cells.
+ * Typical usage will be :-
+ * while(cell_scanner.Advance()){
+ * auto current_cell = cell_scanner.Current();
+ * }
+ */
+ virtual bool Advance() = 0;
+
+ /**
+ * @brief returns the current cell
+ */
+ virtual const std::shared_ptr<Cell> Current() const = 0;
+};
+
+} /* namespace hbase */
diff --git a/hbase-native-client/serde/codec.h b/hbase-native-client/serde/codec.h
new file mode 100644
index 0000000..64807dc
--- /dev/null
+++ b/hbase-native-client/serde/codec.h
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#pragma once
+
+#include <folly/io/IOBuf.h>
+#include <memory>
+
+#include "serde/cell-outputstream.h"
+#include "serde/cell-scanner.h"
+
+namespace hbase {
+
+/**
+ * @brief Encoder / Decoder for Cells.
+ */
+class Codec {
+ public:
+ virtual ~Codec() {}
+
+ class Encoder : public CellOutputStream {};
+
+ class Decoder : public CellScanner {};
+
+ virtual std::unique_ptr<Encoder> CreateEncoder() = 0;
+ virtual std::unique_ptr<Decoder> CreateDecoder(std::unique_ptr<folly::IOBuf> cell_block,
+ uint32_t cell_block_start_offset,
+ uint32_t cell_block_length) = 0;
+
+ /** @brief returns the java class name corresponding to this Codec implementation */
+ virtual const char* java_class_name() const = 0;
+};
+
+} /* namespace hbase */