You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by aw...@apache.org on 2021/03/12 01:05:40 UTC

[kudu] branch master updated: KUDU-2671: No-op ClientServerMapping if only one schema exists.

This is an automated email from the ASF dual-hosted git repository.

awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 8ebe032  KUDU-2671: No-op ClientServerMapping if only one schema exists.
8ebe032 is described below

commit 8ebe032847f618d76459b59c84b150c997016ee9
Author: Mahesh Reddy <mr...@cloudera.com>
AuthorDate: Tue Mar 2 00:45:12 2021 -0800

    KUDU-2671: No-op ClientServerMapping if only one schema exists.
    
    For the new field of PartitionSchema, a client schema is necessary
    for RowOperationsPBDecoder to populate the new field. Not all call
    sites for PartitionSchema::FromPB() have access to an explicit client
    schema.
    
    This patch allows the client schema to have column IDs if it's equal
    to the tablet schema and no-ops the ClientServerMapping in this case.
    If we don't have access to a client schema, we don't need a mapping
    since we only have one schema.
    
    Change-Id: I836f157201509a9b38a3ad42d653236f240dda5e
    Reviewed-on: http://gerrit.cloudera.org:8080/17161
    Reviewed-by: Andrew Wong <aw...@cloudera.com>
    Tested-by: Andrew Wong <aw...@cloudera.com>
---
 src/kudu/common/row_operations-test.cc | 19 +++++++++++++++----
 src/kudu/common/row_operations.cc      | 32 ++++++++++++++++++++++----------
 src/kudu/common/row_operations.h       |  6 ++++++
 3 files changed, 43 insertions(+), 14 deletions(-)

diff --git a/src/kudu/common/row_operations-test.cc b/src/kudu/common/row_operations-test.cc
index 34dccb7..d56b77b 100644
--- a/src/kudu/common/row_operations-test.cc
+++ b/src/kudu/common/row_operations-test.cc
@@ -33,8 +33,8 @@
 #include "kudu/common/partial_row.h"
 #include "kudu/common/row.h"
 #include "kudu/common/schema.h"
-#include "kudu/common/types.h"
 #include "kudu/common/wire_protocol.pb.h"
+#include "kudu/common/types.h"
 #include "kudu/gutil/basictypes.h"
 #include "kudu/gutil/dynamic_annotations.h"
 #include "kudu/gutil/macros.h"
@@ -757,7 +757,10 @@ TEST_F(RowOperationsTest, TestProjectDeletes) {
   }
 }
 
-TEST_F(RowOperationsTest, SplitKeyRoundTrip) {
+namespace {
+// If 'use_tablet_schema_as_client_schema' is true, the tablet schema with its
+// column IDs is passed in as the client_schema for the RowOperationsPBDecoder.
+void TestSplitKeys(bool use_tablet_schema_as_client_schema) {
   Schema client_schema = Schema({ ColumnSchema("int8", INT8),
                                   ColumnSchema("int16", INT16),
                                   ColumnSchema("int32", INT32),
@@ -787,7 +790,8 @@ TEST_F(RowOperationsTest, SplitKeyRoundTrip) {
   RowOperationsPBEncoder(&pb).Add(RowOperationsPB::SPLIT_ROW, row);
 
   Schema schema = client_schema.CopyWithColumnIds();
-  RowOperationsPBDecoder decoder(&pb, &client_schema, &schema, nullptr);
+  RowOperationsPBDecoder decoder(
+      &pb, use_tablet_schema_as_client_schema ? &schema : &client_schema, &schema, nullptr);
   vector<DecodedRowOperation> ops;
   ASSERT_OK(decoder.DecodeOperations<DecoderMode::SPLIT_ROWS>(&ops));
   ASSERT_EQ(1, ops.size());
@@ -821,7 +825,6 @@ TEST_F(RowOperationsTest, SplitKeyRoundTrip) {
   CHECK(!row2->IsColumnSet("missing"));
 }
 
-namespace {
 void CheckExceedCellLimit(const Schema& client_schema,
                           const vector<string>& col_values,
                           RowOperationsPB::Type op_type,
@@ -931,6 +934,14 @@ void CheckSplitExceedCellLimit(const Schema& client_schema,
 }
 } // anonymous namespace
 
+// Decodes a split row using RowOperationsPBDecoder with DecoderMode::SPLIT_ROWS under
+// two conditions, one where the client schema doesn't have column IDs as expected and
+// another where both the client schema and the tablet schema are the same object.
+TEST_F(RowOperationsTest, SplitKeysRoundTrip) {
+  TestSplitKeys(false);
+  TestSplitKeys(true);
+}
+
 TEST_F(RowOperationsTest, ExceedCellLimit) {
   Schema client_schema = Schema({ ColumnSchema("key_string", STRING),
                                   ColumnSchema("key_binary", BINARY),
diff --git a/src/kudu/common/row_operations.cc b/src/kudu/common/row_operations.cc
index 363a424..84cfbce 100644
--- a/src/kudu/common/row_operations.cc
+++ b/src/kudu/common/row_operations.cc
@@ -408,6 +408,14 @@ class ClientServerMapping {
   DISALLOW_COPY_AND_ASSIGN(ClientServerMapping);
 };
 
+size_t RowOperationsPBDecoder::GetTabletColIdx(const ClientServerMapping& mapping,
+                                               size_t client_col_idx) {
+  if (client_schema_ != tablet_schema_) {
+    return mapping.client_to_tablet_idx(client_col_idx);
+  }
+  return client_col_idx;
+}
+
 Status RowOperationsPBDecoder::DecodeInsertOrUpsert(const uint8_t* prototype_row_storage,
                                                     const ClientServerMapping& mapping,
                                                     DecodedRowOperation* op) {
@@ -446,7 +454,7 @@ Status RowOperationsPBDecoder::DecodeInsertOrUpsert(const uint8_t* prototype_row
     // Look up the corresponding column from the tablet. We use the server-side
     // ColumnSchema object since it has the most up-to-date default, nullability,
     // etc.
-    size_t tablet_col_idx = mapping.client_to_tablet_idx(client_col_idx);
+    size_t tablet_col_idx = GetTabletColIdx(mapping, client_col_idx);
     const ColumnSchema& col = tablet_schema_->column(tablet_col_idx);
 
     bool isset = BitmapTest(client_isset_map, client_col_idx);
@@ -515,8 +523,10 @@ Status RowOperationsPBDecoder::DecodeUpdateOrDelete(const ClientServerMapping& m
     // Look up the corresponding column from the tablet. We use the server-side
     // ColumnSchema object since it has the most up-to-date default, nullability,
     // etc.
-    DCHECK_EQ(mapping.client_to_tablet_idx(client_col_idx),
-              client_col_idx) << "key columns should match";
+    if (client_schema_ != tablet_schema_) {
+      DCHECK_EQ(mapping.client_to_tablet_idx(client_col_idx),
+                client_col_idx) << "key columns should match";
+    }
     size_t tablet_col_idx = client_col_idx;
 
     const ColumnSchema& col = tablet_schema_->column(tablet_col_idx);
@@ -550,7 +560,7 @@ Status RowOperationsPBDecoder::DecodeUpdateOrDelete(const ClientServerMapping& m
 
     // Now process the rest of columns as updates.
     for (; client_col_idx < client_schema_->num_columns(); client_col_idx++) {
-      size_t tablet_col_idx = mapping.client_to_tablet_idx(client_col_idx);
+      size_t tablet_col_idx = GetTabletColIdx(mapping, client_col_idx);
       const ColumnSchema& col = tablet_schema_->column(tablet_col_idx);
 
       if (BitmapTest(client_isset_map, client_col_idx)) {
@@ -592,7 +602,7 @@ Status RowOperationsPBDecoder::DecodeUpdateOrDelete(const ClientServerMapping& m
     // Ensure that no other columns are set.
     for (; client_col_idx < client_schema_->num_columns(); client_col_idx++) {
       if (PREDICT_FALSE(BitmapTest(client_isset_map, client_col_idx))) {
-        size_t tablet_col_idx = mapping.client_to_tablet_idx(client_col_idx);
+        size_t tablet_col_idx = GetTabletColIdx(mapping, client_col_idx);
         const ColumnSchema& col = tablet_schema_->column(tablet_col_idx);
         op->SetFailureStatusOnce(Status::InvalidArgument(
             "DELETE should not have a value for column", col.ToString()));
@@ -634,7 +644,7 @@ Status RowOperationsPBDecoder::DecodeSplitRow(const ClientServerMapping& mapping
     // Look up the corresponding column from the tablet. We use the server-side
     // ColumnSchema object since it has the most up-to-date default, nullability,
     // etc.
-    size_t tablet_col_idx = mapping.client_to_tablet_idx(client_col_idx);
+    size_t tablet_col_idx = GetTabletColIdx(mapping, client_col_idx);
     const ColumnSchema& col = tablet_schema_->column(tablet_col_idx);
 
     if (BitmapTest(client_isset_map, client_col_idx)) {
@@ -662,12 +672,14 @@ Status RowOperationsPBDecoder::DecodeOperations(vector<DecodedRowOperation>* ops
   // then drop the column, expecting it to be compatible, but all writes would
   // start failing until clients refreshed their schema.
   // See DISABLED_TestProjectUpdatesSubsetOfColumns
-  CHECK(!client_schema_->has_column_ids());
+  CHECK_EQ(client_schema_->has_column_ids(), client_schema_ == tablet_schema_);
   DCHECK(tablet_schema_->has_column_ids());
   ClientServerMapping mapping(client_schema_, tablet_schema_);
-  RETURN_NOT_OK(client_schema_->GetProjectionMapping(*tablet_schema_, &mapping));
-  DCHECK_EQ(mapping.num_mapped(), client_schema_->num_columns());
-  RETURN_NOT_OK(mapping.CheckAllRequiredColumnsPresent());
+  if (client_schema_ != tablet_schema_) {
+    RETURN_NOT_OK(client_schema_->GetProjectionMapping(*tablet_schema_, &mapping));
+    DCHECK_EQ(mapping.num_mapped(), client_schema_->num_columns());
+    RETURN_NOT_OK(mapping.CheckAllRequiredColumnsPresent());
+  }
 
   // Make a "prototype row" which has all the defaults filled in. We can copy
   // this to create a starting point for each row as we decode it, with
diff --git a/src/kudu/common/row_operations.h b/src/kudu/common/row_operations.h
index 16b7ba4..0717603 100644
--- a/src/kudu/common/row_operations.h
+++ b/src/kudu/common/row_operations.h
@@ -137,6 +137,9 @@ class RowOperationsPBDecoder {
   // columns and rows.
   Status ReadColumnAndDiscard(const ColumnSchema& col);
   bool HasNext() const;
+  // Returns 'client_col_idx' if client and tablet schemas match, if not uses
+  // 'mapping' to translate client schema idx to tablet schema idx.
+  size_t GetTabletColIdx(const ClientServerMapping& mapping, size_t client_col_idx);
 
   Status DecodeInsertOrUpsert(const uint8_t* prototype_row_storage,
                               const ClientServerMapping& mapping,
@@ -160,6 +163,9 @@ class RowOperationsPBDecoder {
                   const ClientServerMapping& mapping, DecodedRowOperation* op);
 
   const RowOperationsPB* const pb_;
+  // If 'client_schema_' and 'tablet_schema_' are the same object, the mapping
+  // between the two schemas is effectively useless since only one schema
+  // exists. This is the only time when the client_schema_ can have column IDs.
   const Schema* const client_schema_;
   const Schema* const tablet_schema_;
   Arena* const dst_arena_;