You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by zg...@apache.org on 2019/03/12 12:45:49 UTC

[hbase] 61/133: HBASE-17278 [C++] Cell Scanner and KeyValueCodec for encoding cells in RPC (Sudeep Sunthankar and enis)

This is an automated email from the ASF dual-hosted git repository.

zghao pushed a commit to branch HBASE-14850
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 9a99925333c63215ef1c70948ce99dc06c8503d3
Author: Enis Soztutar <en...@apache.org>
AuthorDate: Thu Feb 9 11:26:47 2017 -0800

    HBASE-17278 [C++] Cell Scanner and KeyValueCodec for encoding cells in RPC (Sudeep Sunthankar and enis)
---
 hbase-native-client/.gitignore                |   3 +-
 hbase-native-client/core/BUCK                 |   2 +
 hbase-native-client/core/keyvalue-codec.cc    |  69 ++++++++++++
 hbase-native-client/core/keyvalue-codec.h     | 147 ++++++++++++++++++++++++++
 hbase-native-client/serde/BUCK                |   3 +
 hbase-native-client/serde/cell-outputstream.h |  53 ++++++++++
 hbase-native-client/serde/cell-scanner.h      |  50 +++++++++
 hbase-native-client/serde/codec.h             |  50 +++++++++
 8 files changed, 376 insertions(+), 1 deletion(-)

diff --git a/hbase-native-client/.gitignore b/hbase-native-client/.gitignore
index b172fe9..c0032e4 100644
--- a/hbase-native-client/.gitignore
+++ b/hbase-native-client/.gitignore
@@ -22,4 +22,5 @@ buck-out
 *.swp
 
 # Thirdparty dirs
-third-party/googletest*
+third-party/*
+/gcc-debug/
diff --git a/hbase-native-client/core/BUCK b/hbase-native-client/core/BUCK
index f1880a4..d8d15a9 100644
--- a/hbase-native-client/core/BUCK
+++ b/hbase-native-client/core/BUCK
@@ -22,6 +22,7 @@ cxx_library(
         "client.h",
         "cell.h",
         "hbase_macros.h",
+        "keyvalue-codec.h",
         "region-location.h",
         "location-cache.h",
         # TODO: move this out of exported
@@ -40,6 +41,7 @@ cxx_library(
     srcs=[
         "cell.cc",
         "client.cc",
+        "keyvalue-codec.cc",
         "location-cache.cc",
         "meta-utils.cc",
         "get.cc",
diff --git a/hbase-native-client/core/keyvalue-codec.cc b/hbase-native-client/core/keyvalue-codec.cc
new file mode 100644
index 0000000..1b526b9
--- /dev/null
+++ b/hbase-native-client/core/keyvalue-codec.cc
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include "core/keyvalue-codec.h"
+
+#include <string>
+
+namespace hbase {
+
+KeyValueCodec::KVDecoder::KVDecoder(std::unique_ptr<folly::IOBuf> cell_block, uint32_t offset,
+                                    uint32_t length)
+    : cell_block_(std::move(cell_block)), offset_(offset), length_(length) {}
+
+KeyValueCodec::KVDecoder::~KVDecoder() {}
+
+std::shared_ptr<Cell> KeyValueCodec::KVDecoder::Decode(folly::io::Cursor &cursor) {
+  uint32_t key_length = cursor.readBE<uint32_t>();
+  uint32_t value_length = cursor.readBE<uint32_t>();
+  uint16_t row_length = cursor.readBE<uint16_t>();
+  std::string row = cursor.readFixedString(row_length);
+  uint8_t column_family_length = cursor.readBE<uint8_t>();
+  std::string column_family = cursor.readFixedString(column_family_length);
+  int qualifier_length =
+      key_length - (row_length + column_family_length + kHBaseSizeOfKeyInfrastructure_);
+  std::string column_qualifier = cursor.readFixedString(qualifier_length);
+  uint64_t timestamp = cursor.readBE<uint64_t>();
+  uint8_t key_type = cursor.readBE<uint8_t>();
+  std::string value = cursor.readFixedString(value_length);
+
+  return std::make_shared<Cell>(row, column_family, column_qualifier, timestamp, value,
+                                static_cast<hbase::CellType>(key_type));
+}
+
+bool KeyValueCodec::KVDecoder::Advance() {
+  if (end_of_cell_block_) {
+    return false;
+  }
+
+  if (cur_pos_ == length_) {
+    end_of_cell_block_ = true;
+    return false;
+  }
+
+  folly::io::Cursor cursor(cell_block_.get());
+  cursor.skip(offset_ + cur_pos_);
+  uint32_t current_cell_size = cursor.readBE<uint32_t>();
+  current_cell_ = Decode(cursor);
+  cur_pos_ += kHBaseSizeOfInt_ + current_cell_size;
+  return true;
+}
+
+uint32_t KeyValueCodec::KVDecoder::CellBlockLength() const { return length_; }
+} /* namespace hbase */
diff --git a/hbase-native-client/core/keyvalue-codec.h b/hbase-native-client/core/keyvalue-codec.h
new file mode 100644
index 0000000..fd58346
--- /dev/null
+++ b/hbase-native-client/core/keyvalue-codec.h
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#pragma once
+
+#include <folly/io/Cursor.h>
+#include <folly/io/IOBuf.h>
+#include <memory>
+
+#include "core/cell.h"
+#include "serde/codec.h"
+
+namespace hbase {
+
+/**
+ * @brief Class for parsing sequence of Cells based on org.apache.hadoop.hbase.KeyValueCodec.java
+ *
+ * KeyValueCodec implements CellScanner interface. Sequence of cells are obtained from cell_block.
+ * We have CreateEncoder and CreateDecoder public methods which will return Encoder/Decoder
+ * instances which will be used to obtain individual cells in cell_block.
+ * Usage:-
+ * 1) Cell Decoding:-
+ * unique_ptr<CellScanner> cell_scanner = KeyValueCodec::CreateDecoder(cell_block, cb_start_offset,
+ *    cb_length);
+ * while (cell_scanner->Advance()) {
+ *  auto current_cell = cell_scanner->Current
+ * }
+ */
+class KeyValueCodec : public Codec {
+ public:
+  /**
+        * Constructor
+        */
+  KeyValueCodec() {}
+
+  std::unique_ptr<Codec::Encoder> CreateEncoder() override { return std::make_unique<KVEncoder>(); }
+  std::unique_ptr<Codec::Decoder> CreateDecoder(std::unique_ptr<folly::IOBuf> cell_block,
+                                                uint32_t offset, uint32_t length) override {
+    return std::make_unique<KVDecoder>(std::move(cell_block), offset, length);
+  }
+
+  /** @brief returns the java class name corresponding to this Codec implementation */
+  virtual const char* java_class_name() const override { return kJavaClassName; }
+
+  static constexpr const char* kJavaClassName = "org.apache.hadoop.hbase.codec.KeyValueCodec";
+
+ private:
+  class KVEncoder : public Codec::Encoder {
+   public:
+    KVEncoder() {}
+
+    void Write(const Cell& cell) {
+      // TODO: Encode Cells using KeyValueCodec wire format
+    }
+
+    void Flush() {}
+  };
+
+  class KVDecoder : public Codec::Decoder {
+   public:
+    KVDecoder(std::unique_ptr<folly::IOBuf> cell_block, uint32_t cell_block_start_offset,
+              uint32_t cell_block_length);
+    ~KVDecoder();
+
+    /**
+     * @brief Overridden from CellScanner. This method parses cell_block and stores the current in
+     * current_cell_. Current cell can be obtained using cell_scanner.Current();
+     */
+    bool Advance();
+
+    /**
+     * @brief returns the current cell
+     */
+    const std::shared_ptr<Cell> Current() const { return current_cell_; }
+
+    /**
+     * @brief returns the total length of cell_meta_block
+     */
+    uint32_t CellBlockLength() const;
+
+   private:
+    std::shared_ptr<Cell> Decode(folly::io::Cursor& cursor);
+
+    /**
+     * Size of boolean in bytes
+     */
+    const int kHBaseSizeOfBoolean_ = sizeof(uint8_t) / sizeof(uint8_t);
+
+    /**
+     * Size of byte in bytes
+     */
+    const uint8_t kHBaseSizeOfByte_ = kHBaseSizeOfBoolean_;
+
+    /**
+     * Size of int in bytes
+     */
+    const uint32_t kHBaseSizeOfInt_ = sizeof(uint32_t) / kHBaseSizeOfByte_;
+
+    /**
+     * Size of long in bytes
+     */
+    const uint64_t kHBaseSizeOfLong_ = sizeof(uint64_t) / kHBaseSizeOfByte_;
+
+    /**
+     * Size of Short in bytes
+     */
+    const uint16_t kHBaseSizeOfShort_ = sizeof(uint16_t) / kHBaseSizeOfByte_;
+
+    const uint32_t kHBaseSizeOfKeyLength_ = kHBaseSizeOfInt_;
+    const uint32_t kHBaseSizeOfValueLength_ = kHBaseSizeOfInt_;
+    const uint16_t kHBaseSizeOfRowLength_ = kHBaseSizeOfShort_;
+    const uint8_t kHBaseSizeOfFamilyLength_ = kHBaseSizeOfByte_;
+    const uint64_t kHBaseSizeOfTimestamp_ = kHBaseSizeOfLong_;
+    const uint8_t kHBaseSizeOfKeyType_ = kHBaseSizeOfByte_;
+    const uint32_t kHBaseSizeOfTimestampAndKey_ = kHBaseSizeOfTimestamp_ + kHBaseSizeOfKeyType_;
+    const uint32_t kHBaseSizeOfKeyInfrastructure_ =
+        kHBaseSizeOfRowLength_ + kHBaseSizeOfFamilyLength_ + kHBaseSizeOfTimestampAndKey_;
+    const uint32_t kHBaseSizeOfKeyValueInfrastructure_ =
+        kHBaseSizeOfKeyLength_ + kHBaseSizeOfValueLength_;
+
+    std::unique_ptr<folly::IOBuf> cell_block_ = nullptr;
+    uint32_t offset_ = 0;
+    uint32_t length_ = 0;
+    uint32_t cur_pos_ = 0;
+    bool end_of_cell_block_ = false;
+
+    std::shared_ptr<Cell> current_cell_ = nullptr;
+  };
+};
+
+} /* namespace hbase */
diff --git a/hbase-native-client/serde/BUCK b/hbase-native-client/serde/BUCK
index b5aad3d..c6809a8 100644
--- a/hbase-native-client/serde/BUCK
+++ b/hbase-native-client/serde/BUCK
@@ -18,6 +18,9 @@
 cxx_library(
     name="serde",
     exported_headers=[
+        "cell-scanner.h",
+        "cell-outputstream.h",
+        "codec.h",
         "region-info.h",
         "rpc.h",
         "server-name.h",
diff --git a/hbase-native-client/serde/cell-outputstream.h b/hbase-native-client/serde/cell-outputstream.h
new file mode 100644
index 0000000..963dd31
--- /dev/null
+++ b/hbase-native-client/serde/cell-outputstream.h
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#pragma once
+
+#include <memory>
+
+namespace hbase {
+
+class Cell;
+
+/**
+ * @brief Encoder / Decoder for Cells.
+ */
+class CellOutputStream {
+ public:
+  virtual ~CellOutputStream() {}
+
+  /**
+   * Implementation must copy the entire state of the Cell. If the written Cell is modified
+   * immediately after the write method returns, the modifications must have absolutely no effect
+   * on the copy of the Cell that was added in the write.
+   * @param cell Cell to write out
+   * @throws IOException
+   */
+  virtual void Write(const Cell& cell) = 0;
+
+  /**
+   * Let the implementation decide what to do.  Usually means writing accumulated data into a
+   * byte[] that can then be read from the implementation to be sent to disk, put in the block
+   * cache, or sent over the network.
+   * @throws IOException
+   */
+  virtual void Flush() = 0;
+};
+
+} /* namespace hbase */
diff --git a/hbase-native-client/serde/cell-scanner.h b/hbase-native-client/serde/cell-scanner.h
new file mode 100644
index 0000000..fe4a249
--- /dev/null
+++ b/hbase-native-client/serde/cell-scanner.h
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#pragma once
+#include <folly/io/IOBuf.h>
+#include <memory>
+
+namespace hbase {
+
+class Cell;
+
+/**
+ * @brief Interface for iterating over a sequence of Cells
+ */
+class CellScanner {
+ public:
+  virtual ~CellScanner() {}
+
+  /**
+   * @brief This method will be used to iterate the cells.
+   * Typical usage will be :-
+   * while(cell_scanner.Advance()){
+   *  auto current_cell = cell_scanner.Current();
+   * }
+   */
+  virtual bool Advance() = 0;
+
+  /**
+   * @brief returns the current cell
+   */
+  virtual const std::shared_ptr<Cell> Current() const = 0;
+};
+
+} /* namespace hbase */
diff --git a/hbase-native-client/serde/codec.h b/hbase-native-client/serde/codec.h
new file mode 100644
index 0000000..64807dc
--- /dev/null
+++ b/hbase-native-client/serde/codec.h
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#pragma once
+
+#include <folly/io/IOBuf.h>
+#include <memory>
+
+#include "serde/cell-outputstream.h"
+#include "serde/cell-scanner.h"
+
+namespace hbase {
+
+/**
+ * @brief Encoder / Decoder for Cells.
+ */
+class Codec {
+ public:
+  virtual ~Codec() {}
+
+  class Encoder : public CellOutputStream {};
+
+  class Decoder : public CellScanner {};
+
+  virtual std::unique_ptr<Encoder> CreateEncoder() = 0;
+  virtual std::unique_ptr<Decoder> CreateDecoder(std::unique_ptr<folly::IOBuf> cell_block,
+                                                 uint32_t cell_block_start_offset,
+                                                 uint32_t cell_block_length) = 0;
+
+  /** @brief returns the java class name corresponding to this Codec implementation */
+  virtual const char* java_class_name() const = 0;
+};
+
+} /* namespace hbase */