You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by dr...@apache.org on 2017/04/13 17:26:37 UTC

kudu git commit: Allow to get the raw data from a KuduScanBatch

Repository: kudu
Updated Branches:
  refs/heads/master 642e01190 -> b204477a4


Allow to get the raw data from a KuduScanBatch

This allows to fetch both the direct and the indirect raw data from
a KuduScanBatch. Exposing this opens the door for Impala to do
whole batch memcpy, instead of row by row. Ideally there would be
no memcpying at all, followup patches will allow for that.

Change-Id: I3c9ad5aa7c5f45a87827352597a404241912342f
Reviewed-on: http://gerrit.cloudera.org:8080/6574
Reviewed-by: Alexey Serbin <as...@cloudera.com>
Tested-by: David Ribeiro Alves <dr...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/b204477a
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/b204477a
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/b204477a

Branch: refs/heads/master
Commit: b204477a41542291caecb35c23e4a51da7386157
Parents: 642e011
Author: David Alves <dr...@apache.org>
Authored: Thu Apr 6 16:02:29 2017 -0700
Committer: David Ribeiro Alves <dr...@apache.org>
Committed: Thu Apr 13 05:53:50 2017 +0000

----------------------------------------------------------------------
 src/kudu/client/client-test.cc | 49 +++++++++++++++++++++++++++++++++++++
 src/kudu/client/scan_batch.cc  |  8 ++++++
 src/kudu/client/scan_batch.h   | 22 +++++++++++++++++
 src/kudu/client/schema.h       |  2 ++
 src/kudu/common/schema.h       |  5 ++--
 src/kudu/rpc/rpc-test-base.h   |  1 -
 6 files changed, 84 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/b204477a/src/kudu/client/client-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/client/client-test.cc b/src/kudu/client/client-test.cc
index 1fdd75d..4750bbb 100644
--- a/src/kudu/client/client-test.cc
+++ b/src/kudu/client/client-test.cc
@@ -368,9 +368,52 @@ class ClientTest : public KuduTest {
     }
   }
 
+  // Compares rows as obtained through a KuduScanBatch::RowPtr and through the
+  // the raw direct and indirect data blocks exposed by KuduScanBatch,
+  // asserting that they are the same.
+  void AssertRawDataMatches(const KuduSchema& projection_schema,
+                            const KuduScanBatch& batch,
+                            const KuduScanBatch::RowPtr& row,
+                            int row_idx,
+                            int num_projected_cols) {
+
+    const Schema& schema = *projection_schema.schema_;
+    size_t row_stride = ContiguousRowHelper::row_size(schema);
+    const uint8_t* row_data = batch.direct_data().data() + row_idx * row_stride;
+
+    int32_t raw_key_val = *reinterpret_cast<const int32_t*>(
+        ContiguousRowHelper::cell_ptr(schema, row_data, 0));
+    int key_val;
+    ASSERT_OK(row.GetInt32(0, &key_val));
+    EXPECT_EQ(key_val, raw_key_val);
+
+    // Test projections have either 1 or 4 columns.
+    if (num_projected_cols == 1) return;
+    ASSERT_EQ(4, num_projected_cols);
+
+    int32_t raw_int_col_val = *reinterpret_cast<const int32_t*>(
+        ContiguousRowHelper::cell_ptr(schema, row_data, 1));
+    int int_col_val;
+    ASSERT_OK(row.GetInt32(1, &int_col_val));
+    EXPECT_EQ(int_col_val, raw_int_col_val);
+
+    Slice raw_nullable_slice_col_val = *reinterpret_cast<const Slice*>(
+        DCHECK_NOTNULL(ContiguousRowHelper::nullable_cell_ptr(schema, row_data, 2)));
+    Slice nullable_slice_col_val;
+    ASSERT_OK(row.GetString(2, &nullable_slice_col_val));
+    EXPECT_EQ(nullable_slice_col_val, raw_nullable_slice_col_val);
+
+    int32_t raw_col_val = *reinterpret_cast<const int32_t*>(
+        ContiguousRowHelper::cell_ptr(schema, row_data, 3));
+    int col_val;
+    ASSERT_OK(row.GetInt32(3, &col_val));
+    EXPECT_EQ(col_val, raw_col_val);
+  }
+
   void DoTestScanWithoutPredicates() {
     KuduScanner scanner(client_table_.get());
     ASSERT_OK(scanner.SetProjectedColumns({ "key" }));
+
     LOG_TIMING(INFO, "Scanning with no predicates") {
       ASSERT_OK(scanner.Open());
 
@@ -379,10 +422,13 @@ class ClientTest : public KuduTest {
       uint64_t sum = 0;
       while (scanner.HasMoreRows()) {
         ASSERT_OK(scanner.NextBatch(&batch));
+        int count = 0;
         for (const KuduScanBatch::RowPtr& row : batch) {
           int32_t value;
           ASSERT_OK(row.GetInt32(0, &value));
           sum += value;
+          AssertRawDataMatches(
+              scanner.GetProjectionSchema(), batch, row, count++, 1 /* num projected cols */);
         }
       }
       // The sum should be the sum of the arithmetic series from
@@ -409,12 +455,15 @@ class ClientTest : public KuduTest {
       KuduScanBatch batch;
       while (scanner.HasMoreRows()) {
         ASSERT_OK(scanner.NextBatch(&batch));
+        int count = 0;
         for (const KuduScanBatch::RowPtr& row : batch) {
           Slice s;
           ASSERT_OK(row.GetString(2, &s));
           if (!s.starts_with("hello 2") && !s.starts_with("hello 3")) {
             FAIL() << row.ToString();
           }
+          AssertRawDataMatches(
+              scanner.GetProjectionSchema(), batch, row, count++, 4 /* num projected cols */);
         }
       }
     }

http://git-wip-us.apache.org/repos/asf/kudu/blob/b204477a/src/kudu/client/scan_batch.cc
----------------------------------------------------------------------
diff --git a/src/kudu/client/scan_batch.cc b/src/kudu/client/scan_batch.cc
index fb9a538..4f048b3 100644
--- a/src/kudu/client/scan_batch.cc
+++ b/src/kudu/client/scan_batch.cc
@@ -54,6 +54,14 @@ const KuduSchema* KuduScanBatch::projection_schema() const {
   return data_->client_projection_;
 }
 
+Slice KuduScanBatch::direct_data() const {
+  return data_->direct_data_;
+}
+
+Slice KuduScanBatch::indirect_data() const {
+  return data_->indirect_data_;
+}
+
 ////////////////////////////////////////////////////////////
 // KuduScanBatch::RowPtr
 ////////////////////////////////////////////////////////////

http://git-wip-us.apache.org/repos/asf/kudu/blob/b204477a/src/kudu/client/scan_batch.h
----------------------------------------------------------------------
diff --git a/src/kudu/client/scan_batch.h b/src/kudu/client/scan_batch.h
index b4d91f6..987bbf2 100644
--- a/src/kudu/client/scan_batch.h
+++ b/src/kudu/client/scan_batch.h
@@ -116,6 +116,28 @@ class KUDU_EXPORT KuduScanBatch {
   ///   to have this schema.
   const KuduSchema* projection_schema() const;
 
+  /// @name Advanced/Unstable API
+  ///
+  /// There are no guarantees on the stability of the format returned
+  /// by these methods, which might change at any given time.
+  ///
+  /// @note The Slices returned by both direct_data() and indirect_data()
+  ///   are only valid for the lifetime of the KuduScanBatch.
+  //
+  ///@{
+  /// Return a slice that points to the direct row data received from the
+  /// server. Users of this API must have knowledge of the data format in
+  /// order to decode the data.
+  ///
+  /// @return a Slice that points to the raw direct row data.
+  Slice direct_data() const;
+
+  /// Like the method above, but for indirect data.
+  ///
+  /// @return a Slice that points to the raw indirect row data.
+  Slice indirect_data() const;
+  ///@}
+
  private:
   class KUDU_NO_EXPORT Data;
   friend class KuduScanner;

http://git-wip-us.apache.org/repos/asf/kudu/blob/b204477a/src/kudu/client/schema.h
----------------------------------------------------------------------
diff --git a/src/kudu/client/schema.h b/src/kudu/client/schema.h
index 5bdba4d..45f7c9f 100644
--- a/src/kudu/client/schema.h
+++ b/src/kudu/client/schema.h
@@ -37,6 +37,7 @@ class ReplicaDumper;
 }
 
 namespace client {
+class ClientTest;
 
 namespace internal {
 class GetTableSchemaRpc;
@@ -484,6 +485,7 @@ class KUDU_EXPORT KuduSchema {
   KuduPartialRow* NewRow() const;
 
  private:
+  friend class ClientTest;
   friend class KuduClient;
   friend class KuduScanner;
   friend class KuduScanToken;

http://git-wip-us.apache.org/repos/asf/kudu/blob/b204477a/src/kudu/common/schema.h
----------------------------------------------------------------------
diff --git a/src/kudu/common/schema.h b/src/kudu/common/schema.h
index 453e470..e794e11 100644
--- a/src/kudu/common/schema.h
+++ b/src/kudu/common/schema.h
@@ -391,9 +391,10 @@ class Schema {
                const vector<ColumnId>& ids,
                int key_columns);
 
-  // Return the number of bytes needed to represent a single row of this schema.
+  // Return the number of bytes needed to represent a single row of this schema, without
+  // accounting for the null bitmap if the Schema contains nullable values.
   //
-  // This size does not include any indirected (variable length) data (eg strings)
+  // This size does not include any indirected (variable length) data (eg strings).
   size_t byte_size() const {
     DCHECK(initialized());
     return col_offsets_.back();

http://git-wip-us.apache.org/repos/asf/kudu/blob/b204477a/src/kudu/rpc/rpc-test-base.h
----------------------------------------------------------------------
diff --git a/src/kudu/rpc/rpc-test-base.h b/src/kudu/rpc/rpc-test-base.h
index 0eef8be..1652992 100644
--- a/src/kudu/rpc/rpc-test-base.h
+++ b/src/kudu/rpc/rpc-test-base.h
@@ -448,7 +448,6 @@ class RpcTestBase : public KuduTest {
 
     Slice first = GetSidecarPointer(controller, resp.sidecar1(), size1);
     Slice second = GetSidecarPointer(controller, resp.sidecar2(), size2);
-
     Random rng(kSeed);
     faststring expected;