You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by mp...@apache.org on 2019/02/28 01:13:20 UTC

[kudu] branch master updated (1aae078 -> 8f85019)

This is an automated email from the ASF dual-hosted git repository.

mpercy pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git.


    from 1aae078  [docker] Fix build to work outside of Git WD
     new cfc7b9d  Schema copy and move constructors/operators must copy max_col_id_
     new e3da951  schema: Fix ColumnSchema::Equals() usage
     new 7cb582c  KUDU-2645: schema: Add support for virtual columns in projections
     new 8f85019  KUDU-2645: schema: Validate that defaults are provided for virtual columns

The 4 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/kudu/client/client-unittest.cc       |  19 +++++
 src/kudu/client/schema.cc                |   2 +-
 src/kudu/client/schema.h                 |   5 ++
 src/kudu/common/column_predicate-test.cc |  48 +++++++++++++
 src/kudu/common/column_predicate.cc      |   4 +-
 src/kudu/common/column_predicate.h       |   9 ++-
 src/kudu/common/schema-test.cc           | 117 +++++++++++++++++++++++++------
 src/kudu/common/schema.cc                |  29 +++++++-
 src/kudu/common/schema.h                 |  39 ++++++++---
 9 files changed, 233 insertions(+), 39 deletions(-)


[kudu] 03/04: KUDU-2645: schema: Add support for virtual columns in projections

Posted by mp...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

mpercy pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 7cb582ccbf6a87de3a1f25ae5e5b9776a338f183
Author: Mike Percy <mp...@apache.org>
AuthorDate: Fri Dec 21 10:58:12 2018 -0800

    KUDU-2645: schema: Add support for virtual columns in projections
    
    * Do not validate that virtual columns appearing in a user projection
      also appear in the tablet schema because virtual columns are not
      physically present and so should never appear in a tablet schema.
    * Generate artificial column ids for virtual columns so that we can map
      the virtual columns onto a scanner schema that includes column ids,
      like a tablet iterator. Schemas have the invariant that either there
      are no column ids present in a schema or all columns in the schema are
      mapped to a column id.
    
    This patch adds a simple unit test for Schema::GetMappedReadProjection()
    that includes the use of a virtual column.
    
    Change-Id: I06a69ab9d65245e25b1a955765a3a1ebdb9655d3
    Reviewed-on: http://gerrit.cloudera.org:8080/12204
    Reviewed-by: Adar Dembo <ad...@cloudera.com>
    Tested-by: Kudu Jenkins
---
 src/kudu/common/schema-test.cc | 32 ++++++++++++++++++++++++++++++++
 src/kudu/common/schema.cc      | 15 ++++++++++++++-
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/src/kudu/common/schema-test.cc b/src/kudu/common/schema-test.cc
index 69f70ca..a66323e 100644
--- a/src/kudu/common/schema-test.cc
+++ b/src/kudu/common/schema-test.cc
@@ -440,6 +440,38 @@ TEST_F(TestSchema, TestProjectRename) {
   ASSERT_EQ(row_projector.projection_defaults()[0], 2);      // non_present schema2
 }
 
+// Test that we can map a projection schema (no column ids) onto a tablet
+// schema (column ids).
+TEST_F(TestSchema, TestGetMappedReadProjection) {
+  Schema tablet_schema({ ColumnSchema("key", STRING),
+                         ColumnSchema("val", INT32) },
+                       { ColumnId(0),
+                         ColumnId(1) },
+                       1);
+  Schema projection({ ColumnSchema("key", STRING),
+                      ColumnSchema("deleted", IS_DELETED) },
+                    1);
+
+  Schema mapped;
+  ASSERT_OK(tablet_schema.GetMappedReadProjection(projection, &mapped));
+  ASSERT_EQ(1, mapped.num_key_columns());
+  ASSERT_EQ(2, mapped.num_columns());
+  ASSERT_TRUE(mapped.has_column_ids());
+  ASSERT_FALSE(mapped.Equals(projection, Schema::COMPARE_ALL));
+
+  // The column id for the 'key' column in the mapped projection should match
+  // the one from the tablet schema.
+  ASSERT_EQ("key", mapped.column(0).name());
+  ASSERT_EQ(0, mapped.column_id(0));
+
+  // Since 'deleted' is a virtual column and thus does not appear in the tablet
+  // schema, in the mapped schema it should have been assigned a higher column
+  // id than the highest column id in the tablet schema.
+  ASSERT_EQ("deleted", mapped.column(1).name());
+  ASSERT_GT(mapped.column_id(1), tablet_schema.column_id(1));
+  ASSERT_GT(mapped.max_col_id(), tablet_schema.max_col_id());
+}
+
 // Test that the schema can be used to compare and stringify rows.
 TEST_F(TestSchema, TestRowOperations) {
   Schema schema({ ColumnSchema("col1", STRING),
diff --git a/src/kudu/common/schema.cc b/src/kudu/common/schema.cc
index 3ce4fb9..a035fda 100644
--- a/src/kudu/common/schema.cc
+++ b/src/kudu/common/schema.cc
@@ -356,11 +356,16 @@ Status Schema::VerifyProjectionCompatibility(const Schema& projection) const {
 
   vector<string> missing_columns;
   for (const ColumnSchema& pcol : projection.columns()) {
+    if (pcol.type_info()->is_virtual()) {
+      // Virtual columns may appear in a projection without appearing in the
+      // schema being projected onto.
+      continue;
+    }
     int index = find_column(pcol.name());
     if (index < 0) {
       missing_columns.push_back(pcol.name());
     } else if (!pcol.EqualsType(cols_[index])) {
-      // TODO: We don't support query with type adaptors yet
+      // TODO(matteo): We don't support query with type adaptors yet.
       return Status::InvalidArgument("The column '" + pcol.name() + "' must have type " +
                                      cols_[index].TypeToString() + " found " + pcol.TypeToString());
     }
@@ -391,8 +396,16 @@ Status Schema::GetMappedReadProjection(const Schema& projection,
   mapped_cols.reserve(projection.num_columns());
   mapped_ids.reserve(projection.num_columns());
 
+  int32_t proj_max_col_id = max_col_id_;
   for (const ColumnSchema& col : projection.columns()) {
     int index = find_column(col.name());
+    if (col.type_info()->is_virtual()) {
+      DCHECK_EQ(kColumnNotFound, index) << "virtual column not expected in tablet schema";
+      mapped_cols.push_back(col);
+      // Generate a "fake" column id for virtual columns.
+      mapped_ids.emplace_back(++proj_max_col_id);
+      continue;
+    }
     DCHECK_GE(index, 0) << col.name();
     mapped_cols.push_back(cols_[index]);
     mapped_ids.push_back(col_ids_[index]);


[kudu] 01/04: Schema copy and move constructors/operators must copy max_col_id_

Posted by mp...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

mpercy pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit cfc7b9d45ada5b3f36be045ec0dfa81c16b2cc99
Author: Mike Percy <mp...@apache.org>
AuthorDate: Fri Feb 8 00:16:43 2019 -0800

    Schema copy and move constructors/operators must copy max_col_id_
    
    The CopyFrom() helper method used by the Schema copy constructor and
    assignment operator, as well as the move constructor and move operator,
    previously did not copy the value of max_col_id_, leaving it
    uninitialized in the copy. This patch addresses that oversight.
    
    This patch also updates the Schema::Equals() method to provide the
    option (not used by default) to check that the column ids and max column
    id match between the compared schemas, allowing us to provide coverage
    by beefing up an existing unit test.
    
    Change-Id: I61b8df1142dc733c446a5717ca8cae1309f3e867
    Reviewed-on: http://gerrit.cloudera.org:8080/12415
    Tested-by: Kudu Jenkins
    Reviewed-by: Adar Dembo <ad...@cloudera.com>
---
 src/kudu/common/schema-test.cc | 60 ++++++++++++++++++++++++++++--------------
 src/kudu/common/schema.cc      |  5 +++-
 src/kudu/common/schema.h       | 30 ++++++++++++++++-----
 3 files changed, 68 insertions(+), 27 deletions(-)

diff --git a/src/kudu/common/schema-test.cc b/src/kudu/common/schema-test.cc
index 198bf00..a35e3be 100644
--- a/src/kudu/common/schema-test.cc
+++ b/src/kudu/common/schema-test.cc
@@ -121,7 +121,18 @@ TEST_F(TestSchema, TestSchemaToStringMode) {
             schema.ToString(Schema::ToStringMode::BASE_INFO));
 }
 
-TEST_F(TestSchema, TestCopyAndMove) {
+enum IncludeColumnIds {
+  INCLUDE_COL_IDS,
+  NO_COL_IDS
+};
+
+class ParameterizedSchemaTest : public KuduTest,
+                                public ::testing::WithParamInterface<IncludeColumnIds> {};
+
+INSTANTIATE_TEST_CASE_P(SchemaTypes, ParameterizedSchemaTest,
+                        ::testing::Values(INCLUDE_COL_IDS, NO_COL_IDS));
+
+TEST_P(ParameterizedSchemaTest, TestCopyAndMove) {
   auto check_schema = [](const Schema& schema) {
     ASSERT_EQ(sizeof(Slice) + sizeof(uint32_t) + sizeof(int32_t),
               schema.byte_size());
@@ -129,12 +140,15 @@ TEST_F(TestSchema, TestCopyAndMove) {
     ASSERT_EQ(0, schema.column_offset(0));
     ASSERT_EQ(sizeof(Slice), schema.column_offset(1));
 
-    EXPECT_EQ("(\n"
-              "    key STRING NOT NULL,\n"
-              "    uint32val UINT32 NULLABLE,\n"
-              "    int32val INT32 NOT NULL,\n"
-              "    PRIMARY KEY (key)\n"
-              ")",
+    EXPECT_EQ(Substitute("(\n"
+                         "    $0key STRING NOT NULL,\n"
+                         "    $1uint32val UINT32 NULLABLE,\n"
+                         "    $2int32val INT32 NOT NULL,\n"
+                         "    PRIMARY KEY (key)\n"
+                         ")",
+                         schema.has_column_ids() ? "0:" : "",
+                         schema.has_column_ids() ? "1:" : "",
+                         schema.has_column_ids() ? "2:" : ""),
               schema.ToString());
     EXPECT_EQ("key STRING NOT NULL", schema.column(0).ToString());
     EXPECT_EQ("UINT32 NULLABLE", schema.column(1).TypeToString());
@@ -145,38 +159,44 @@ TEST_F(TestSchema, TestCopyAndMove) {
   ColumnSchema col3("int32val", INT32);
 
   vector<ColumnSchema> cols = { col1, col2, col3 };
-  Schema schema(cols, 1);
-  check_schema(schema);
+  vector<ColumnId> ids = { ColumnId(0), ColumnId(1), ColumnId(2) };
+  const int kNumKeyCols = 1;
+
+  Schema schema = GetParam() == INCLUDE_COL_IDS ?
+                      Schema(cols, ids, kNumKeyCols) :
+                      Schema(cols, kNumKeyCols);
+
+  NO_FATALS(check_schema(schema));
 
   // Check copy- and move-assignment.
   Schema moved_schema;
   {
     Schema copied_schema = schema;
-    check_schema(copied_schema);
-    ASSERT_TRUE(copied_schema.Equals(schema));
+    NO_FATALS(check_schema(copied_schema));
+    ASSERT_TRUE(copied_schema.Equals(schema, Schema::COMPARE_ALL));
 
     // Move-assign to 'moved_to_schema' from 'copied_schema' and then let
-    // 'copied_schema' go out of scope to make sure none of 'moved_to_schema''s
+    // 'copied_schema' go out of scope to make sure none of the 'moved_schema'
     // resources are incorrectly freed.
     moved_schema = std::move(copied_schema);
 
     // 'copied_schema' is moved from so it should still be valid to call
     // ToString(), though we can't expect any particular result.
-    NO_FATALS(copied_schema.ToString()); // NOLINT(*)
+    copied_schema.ToString(); // NOLINT(*)
   }
-  check_schema(moved_schema);
-  ASSERT_TRUE(moved_schema.Equals(schema));
+  NO_FATALS(check_schema(moved_schema));
+  ASSERT_TRUE(moved_schema.Equals(schema, Schema::COMPARE_ALL));
 
   // Check copy- and move-construction.
   {
     Schema copied_schema(schema);
-    check_schema(copied_schema);
-    ASSERT_TRUE(copied_schema.Equals(schema));
+    NO_FATALS(check_schema(copied_schema));
+    ASSERT_TRUE(copied_schema.Equals(schema, Schema::COMPARE_ALL));
 
     Schema moved_schema(std::move(copied_schema));
-    NO_FATALS(copied_schema.ToString()); // NOLINT(*)
-    check_schema(moved_schema);
-    ASSERT_TRUE(moved_schema.Equals(schema));
+    copied_schema.ToString(); // NOLINT(*)
+    NO_FATALS(check_schema(moved_schema));
+    ASSERT_TRUE(moved_schema.Equals(schema, Schema::COMPARE_ALL));
   }
 }
 
diff --git a/src/kudu/common/schema.cc b/src/kudu/common/schema.cc
index 46f74a1..3ce4fb9 100644
--- a/src/kudu/common/schema.cc
+++ b/src/kudu/common/schema.cc
@@ -170,6 +170,7 @@ void Schema::CopyFrom(const Schema& other) {
   num_key_columns_ = other.num_key_columns_;
   cols_ = other.cols_;
   col_ids_ = other.col_ids_;
+  max_col_id_ = other.max_col_id_;
   col_offsets_ = other.col_offsets_;
   id_to_index_ = other.id_to_index_;
 
@@ -189,9 +190,10 @@ Schema::Schema(Schema&& other) noexcept
     : cols_(std::move(other.cols_)),
       num_key_columns_(other.num_key_columns_),
       col_ids_(std::move(other.col_ids_)),
+      max_col_id_(other.max_col_id_),
       col_offsets_(std::move(other.col_offsets_)),
       name_to_index_bytes_(0),
-      name_to_index_(/*bucket_count=*/10,
+      name_to_index_(/*bucket_count*/ 10,
                      NameToIndexMap::hasher(),
                      NameToIndexMap::key_equal(),
                      NameToIndexMapAllocator(&name_to_index_bytes_)),
@@ -215,6 +217,7 @@ Schema& Schema::operator=(Schema&& other) noexcept {
     cols_ = std::move(other.cols_);
     num_key_columns_ = other.num_key_columns_;
     col_ids_ = std::move(other.col_ids_);
+    max_col_id_ = other.max_col_id_;
     col_offsets_ = std::move(other.col_offsets_);
     id_to_index_ = std::move(other.id_to_index_);
     has_nullables_ = other.has_nullables_;
diff --git a/src/kudu/common/schema.h b/src/kudu/common/schema.h
index cd6dbc9..fd6b40e 100644
--- a/src/kudu/common/schema.h
+++ b/src/kudu/common/schema.h
@@ -747,15 +747,33 @@ class Schema {
   // so should only be used when necessary for output.
   std::string ToString(ToStringMode mode = ToStringMode::WITH_COLUMN_IDS) const;
 
+  // Compare column ids in Equals() method.
+  enum SchemaComparisonType {
+    COMPARE_COLUMNS = 1 << 0,
+    COMPARE_COLUMN_IDS = 1 << 1,
+
+    COMPARE_ALL = COMPARE_COLUMNS | COMPARE_COLUMN_IDS
+  };
+
   // Return true if the schemas have exactly the same set of columns
   // and respective types.
-  bool Equals(const Schema &other) const {
+  bool Equals(const Schema& other, SchemaComparisonType flags = COMPARE_COLUMNS) const {
     if (this == &other) return true;
-    if (this->num_key_columns_ != other.num_key_columns_) return false;
-    if (this->num_columns() != other.num_columns()) return false;
 
-    for (size_t i = 0; i < other.num_columns(); i++) {
-      if (!this->cols_[i].Equals(other.cols_[i])) return false;
+    if (flags & COMPARE_COLUMNS) {
+      if (this->num_key_columns_ != other.num_key_columns_) return false;
+      if (this->num_columns() != other.num_columns()) return false;
+      for (size_t i = 0; i < other.num_columns(); i++) {
+        if (!this->cols_[i].Equals(other.cols_[i])) return false;
+      }
+    }
+
+    if (flags & COMPARE_COLUMN_IDS) {
+      if (this->has_column_ids() != other.has_column_ids()) return false;
+      if (this->has_column_ids()) {
+        if (this->col_ids_ != other.col_ids_) return false;
+        if (this->max_col_id() != other.max_col_id()) return false;
+      }
     }
 
     return true;
@@ -918,8 +936,8 @@ class Schema {
 
   std::vector<ColumnSchema> cols_;
   size_t num_key_columns_;
-  ColumnId max_col_id_;
   std::vector<ColumnId> col_ids_;
+  ColumnId max_col_id_;
   std::vector<size_t> col_offsets_;
 
   // The keys of this map are StringPiece references to the actual name members of the


[kudu] 02/04: schema: Fix ColumnSchema::Equals() usage

Posted by mp...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

mpercy pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit e3da9518787732f84d29ea7e9622745088ecdbba
Author: Mike Percy <mp...@apache.org>
AuthorDate: Tue Feb 26 10:45:36 2019 -0800

    schema: Fix ColumnSchema::Equals() usage
    
    In commit 4be39feef4907550c16f9d2b34430d098b9f1b21 the signature of
    ColumnSchema::Equals() was changed from taking a boolean flag for
    checking whether the defaults matched to taking an enum for describing a
    broader set of comparison options. However, a few call sites were not
    updated when that change was made, and due to type coercion several call
    sites ended up with weaker comparison semantics.
    
    This patch reinstates the behavior prior to the merge of the
    above-mentioned patch for the problematic call sites and updates the
    function signature of ColumnSchema::Equals() to treat the comparison
    enum as strongly typed to avoid similar problems in the future.
    
    Updated tests to add coverage for the relevant call sites.
    
    Change-Id: I416e4a44d0ed259d251d52a663b72acd79b46f33
    Reviewed-on: http://gerrit.cloudera.org:8080/12605
    Reviewed-by: Adar Dembo <ad...@cloudera.com>
    Tested-by: Kudu Jenkins
---
 src/kudu/client/client-unittest.cc       | 19 +++++++++++++
 src/kudu/client/schema.cc                |  2 +-
 src/kudu/client/schema.h                 |  5 ++++
 src/kudu/common/column_predicate-test.cc | 48 ++++++++++++++++++++++++++++++++
 src/kudu/common/column_predicate.cc      |  4 +--
 src/kudu/common/column_predicate.h       |  9 ++++--
 src/kudu/common/schema-test.cc           |  3 +-
 src/kudu/common/schema.h                 |  8 +++---
 8 files changed, 87 insertions(+), 11 deletions(-)

diff --git a/src/kudu/client/client-unittest.cc b/src/kudu/client/client-unittest.cc
index 3112ca8..3cf76f8 100644
--- a/src/kudu/client/client-unittest.cc
+++ b/src/kudu/client/client-unittest.cc
@@ -312,5 +312,24 @@ TEST(ClientUnitTest, TestKuduSchemaToStringWithColumnIds) {
             kudu_schema.ToString());
 }
 
+TEST(KuduColumnSchemaTest, TestEquals) {
+  KuduColumnSchema a32("a", KuduColumnSchema::INT32);
+  ASSERT_TRUE(a32.Equals(a32));
+
+  KuduColumnSchema a32_2(a32);
+  ASSERT_TRUE(a32.Equals(a32_2));
+
+  KuduColumnSchema b32("b", KuduColumnSchema::INT32);
+  ASSERT_FALSE(a32.Equals(b32));
+
+  KuduColumnSchema a16("a", KuduColumnSchema::INT16);
+  ASSERT_FALSE(a32.Equals(a16));
+
+  const int kDefaultOf7 = 7;
+  KuduColumnSchema a32_dflt("a", KuduColumnSchema::INT32, /*is_nullable=*/false,
+                              /*default_value=*/&kDefaultOf7);
+  ASSERT_FALSE(a32.Equals(a32_dflt));
+}
+
 } // namespace client
 } // namespace kudu
diff --git a/src/kudu/client/schema.cc b/src/kudu/client/schema.cc
index cdc510c..c75a293 100644
--- a/src/kudu/client/schema.cc
+++ b/src/kudu/client/schema.cc
@@ -647,7 +647,7 @@ void KuduColumnSchema::CopyFrom(const KuduColumnSchema& other) {
 bool KuduColumnSchema::Equals(const KuduColumnSchema& other) const {
   return this == &other ||
     col_ == other.col_ ||
-    (col_ != nullptr && col_->Equals(*other.col_, true));
+    (col_ != nullptr && col_->Equals(*other.col_, ColumnSchema::COMPARE_ALL));
 }
 
 const std::string& KuduColumnSchema::name() const {
diff --git a/src/kudu/client/schema.h b/src/kudu/client/schema.h
index 217cf1f..5495ab7 100644
--- a/src/kudu/client/schema.h
+++ b/src/kudu/client/schema.h
@@ -27,6 +27,7 @@
 #include <vector>
 
 #ifdef KUDU_HEADERS_NO_STUBS
+#include <gtest/gtest_prod.h>
 #include "kudu/gutil/port.h"
 #else
 #include "kudu/client/stubs.h"
@@ -248,6 +249,10 @@ class KUDU_EXPORT KuduColumnSchema {
   // is transitive to nested classes. See https://s.apache.org/inner-class-friends
   friend class KuduTableAlterer;
 
+#ifdef KUDU_HEADERS_NO_STUBS
+  FRIEND_TEST(KuduColumnSchemaTest, TestEquals);
+#endif
+
   KuduColumnSchema();
 
 #if defined(__clang__) || \
diff --git a/src/kudu/common/column_predicate-test.cc b/src/kudu/common/column_predicate-test.cc
index e0b6b63..e40e1a4 100644
--- a/src/kudu/common/column_predicate-test.cc
+++ b/src/kudu/common/column_predicate-test.cc
@@ -31,6 +31,7 @@
 #include "kudu/common/types.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/util/bloom_filter.h"
+#include "kudu/util/hash.pb.h"
 #include "kudu/util/int128.h"
 #include "kudu/util/memory/arena.h"
 #include "kudu/util/random.h"
@@ -1448,4 +1449,51 @@ TEST_F(TestColumnPredicate, TestBloomFilterMerge) {
   TestMergeBloomFilterCombinations(ColumnSchema("c", STRING, true), &bfs, binary_keys);
 }
 
+// Test ColumnPredicate operator (in-)equality.
+TEST_F(TestColumnPredicate, TestEquals) {
+  ColumnSchema c1("c1", INT32, true);
+  ASSERT_EQ(ColumnPredicate::None(c1), ColumnPredicate::None(c1));
+
+  ColumnSchema c1a("c1", INT32, true);
+  ASSERT_EQ(ColumnPredicate::None(c1), ColumnPredicate::None(c1a));
+
+  ColumnSchema c2("c2", INT32, true);
+  ASSERT_NE(ColumnPredicate::None(c1), ColumnPredicate::None(c2));
+
+  ColumnSchema c1string("c1", STRING, true);
+  ASSERT_NE(ColumnPredicate::None(c1), ColumnPredicate::None(c1string));
+
+  const int kDefaultOf3 = 3;
+  ColumnSchema c1dflt("c1", INT32, /*is_nullable=*/false, /*read_default=*/&kDefaultOf3);
+  ASSERT_NE(ColumnPredicate::None(c1), ColumnPredicate::None(c1dflt));
+}
+
+using TestColumnPredicateDeathTest = TestColumnPredicate;
+
+// Ensure that ColumnPredicate::Merge(other) requires the 'other' predicate to
+// have the same column name and type as 'this'.
+TEST_F(TestColumnPredicateDeathTest, TestMergeRequiresNameAndType) {
+
+  ColumnSchema c1int32("c1", INT32, true);
+  ColumnSchema c2int32("c2", INT32, true);
+  vector<int32_t> values = { 0, 1, 2, 3 };
+
+  EXPECT_DEATH({
+    // This should crash because the columns have different names.
+    TestMerge(ColumnPredicate::Equality(c1int32, &values[0]),
+              ColumnPredicate::Equality(c2int32, &values[0]),
+              ColumnPredicate::None(c1int32), // unused
+              PredicateType::None);
+  }, "COMPARE_NAME_AND_TYPE");
+
+  ColumnSchema c1int16("c1", INT16, true);
+  EXPECT_DEATH({
+    // This should crash because the columns have different types.
+    TestMerge(ColumnPredicate::Equality(c1int32, &values[0]),
+              ColumnPredicate::Equality(c1int16, &values[0]),
+              ColumnPredicate::None(c1int32), // unused
+              PredicateType::None);
+  }, "COMPARE_NAME_AND_TYPE");
+}
+
 } // namespace kudu
diff --git a/src/kudu/common/column_predicate.cc b/src/kudu/common/column_predicate.cc
index b923ff4..5f9e7be 100644
--- a/src/kudu/common/column_predicate.cc
+++ b/src/kudu/common/column_predicate.cc
@@ -291,7 +291,7 @@ void ColumnPredicate::Simplify() {
 }
 
 void ColumnPredicate::Merge(const ColumnPredicate& other) {
-  CHECK(column_.Equals(other.column_, false));
+  CHECK(column_.Equals(other.column_, ColumnSchema::COMPARE_NAME_AND_TYPE));
   switch (predicate_type_) {
     case PredicateType::None: return;
     case PredicateType::Range: {
@@ -822,7 +822,7 @@ string ColumnPredicate::ToString() const {
 }
 
 bool ColumnPredicate::operator==(const ColumnPredicate& other) const {
-  if (!column_.Equals(other.column_, false)) { return false; }
+  if (!column_.Equals(other.column_, ColumnSchema::COMPARE_NAME_AND_TYPE)) { return false; }
   if (predicate_type_ != other.predicate_type_) {
     return false;
   }
diff --git a/src/kudu/common/column_predicate.h b/src/kudu/common/column_predicate.h
index ea8385b..2527fe1 100644
--- a/src/kudu/common/column_predicate.h
+++ b/src/kudu/common/column_predicate.h
@@ -17,10 +17,9 @@
 
 #pragma once
 
+#include <algorithm>
 #include <cstddef>
 #include <cstdint>
-
-#include <algorithm>
 #include <ostream>
 #include <string>
 #include <vector>
@@ -32,6 +31,7 @@
 #include "kudu/common/schema.h"
 #include "kudu/common/types.h"
 #include "kudu/util/bloom_filter.h"
+#include "kudu/util/hash.pb.h"
 #include "kudu/util/slice.h"
 
 namespace kudu {
@@ -233,6 +233,11 @@ class ColumnPredicate {
   // Predicates over different columns are not equal.
   bool operator==(const ColumnPredicate& other) const;
 
+  // Negation of operator==.
+  bool operator!=(const ColumnPredicate& other) const {
+    return !(*this == other);
+  }
+
   // Returns the raw lower bound value if this is a range predicate, or the
   // equality value if this is an equality predicate.
   const void* raw_lower() const {
diff --git a/src/kudu/common/schema-test.cc b/src/kudu/common/schema-test.cc
index a35e3be..69f70ca 100644
--- a/src/kudu/common/schema-test.cc
+++ b/src/kudu/common/schema-test.cc
@@ -303,8 +303,7 @@ TEST_F(TestSchema, TestSchemaEquals) {
   ASSERT_FALSE(schema2.KeyTypeEquals(schema3));
   ASSERT_FALSE(schema3.Equals(schema4));
   ASSERT_TRUE(schema4.Equals(schema4));
-  ASSERT_TRUE(schema3.KeyEquals(schema4,
-              ColumnSchema::COMPARE_NAME | ColumnSchema::COMPARE_TYPE));
+  ASSERT_TRUE(schema3.KeyEquals(schema4, ColumnSchema::COMPARE_NAME_AND_TYPE));
 }
 
 TEST_F(TestSchema, TestReset) {
diff --git a/src/kudu/common/schema.h b/src/kudu/common/schema.h
index fd6b40e..0eb2d67 100644
--- a/src/kudu/common/schema.h
+++ b/src/kudu/common/schema.h
@@ -300,16 +300,17 @@ class ColumnSchema {
   }
 
   // compare types in Equals function
-  enum {
+  enum CompareFlags {
     COMPARE_NAME = 1 << 0,
     COMPARE_TYPE = 1 << 1,
     COMPARE_DEFAULTS = 1 << 2,
 
+    COMPARE_NAME_AND_TYPE = COMPARE_NAME | COMPARE_TYPE,
     COMPARE_ALL = COMPARE_NAME | COMPARE_TYPE | COMPARE_DEFAULTS
   };
 
   bool Equals(const ColumnSchema &other,
-              int flags = COMPARE_ALL) const {
+              CompareFlags flags = COMPARE_ALL) const {
     if (this == &other) return true;
 
     if ((flags & COMPARE_NAME) && this->name_ != other.name_)
@@ -782,8 +783,7 @@ class Schema {
   // Return true if the key projection schemas have exactly the same set of
   // columns and respective types.
   bool KeyEquals(const Schema& other,
-                 int flags
-                    = ColumnSchema::COMPARE_NAME | ColumnSchema::COMPARE_TYPE) const {
+                 ColumnSchema::CompareFlags flags = ColumnSchema::COMPARE_NAME_AND_TYPE) const {
     if (this == &other) return true;
     if (this->num_key_columns_ != other.num_key_columns_) return false;
     for (size_t i = 0; i < this->num_key_columns_; i++) {


[kudu] 04/04: KUDU-2645: schema: Validate that defaults are provided for virtual columns

Posted by mp...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

mpercy pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 8f850190c5255ab2d072af1155bc674f84986b41
Author: Mike Percy <mp...@apache.org>
AuthorDate: Fri Feb 8 01:43:43 2019 -0800

    KUDU-2645: schema: Validate that defaults are provided for virtual columns
    
    Virtual columns must not be nullable and must have read defaults.
    Instead of crashing when we encounter violations of these requirements,
    return a bad status code when mapping such a projection onto a tablet
    schema.
    
    TODO(KUDU-2692): Consider removing the requirement to specify a read
    default on a virtual column by defining a built-in read default for each
    virtual column type, such as a default of false for IS_DELETED.
    
    Change-Id: I9144390f9ed02789a1ea161277b5f1567cfc2ffe
    Reviewed-on: http://gerrit.cloudera.org:8080/12416
    Reviewed-by: Adar Dembo <ad...@cloudera.com>
    Tested-by: Kudu Jenkins
---
 src/kudu/common/schema-test.cc | 24 +++++++++++++++++++++++-
 src/kudu/common/schema.cc      |  9 +++++++++
 src/kudu/common/schema.h       |  1 +
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/src/kudu/common/schema-test.cc b/src/kudu/common/schema-test.cc
index a66323e..ee428f7 100644
--- a/src/kudu/common/schema-test.cc
+++ b/src/kudu/common/schema-test.cc
@@ -448,8 +448,10 @@ TEST_F(TestSchema, TestGetMappedReadProjection) {
                        { ColumnId(0),
                          ColumnId(1) },
                        1);
+  const bool kReadDefault = false;
   Schema projection({ ColumnSchema("key", STRING),
-                      ColumnSchema("deleted", IS_DELETED) },
+                      ColumnSchema("deleted", IS_DELETED,
+                                   /*is_nullable=*/false, /*read_default=*/&kReadDefault) },
                     1);
 
   Schema mapped;
@@ -470,6 +472,26 @@ TEST_F(TestSchema, TestGetMappedReadProjection) {
   ASSERT_EQ("deleted", mapped.column(1).name());
   ASSERT_GT(mapped.column_id(1), tablet_schema.column_id(1));
   ASSERT_GT(mapped.max_col_id(), tablet_schema.max_col_id());
+
+  // Ensure that virtual columns that are nullable or that do not have read
+  // defaults are rejected.
+  Schema nullable_projection({ ColumnSchema("key", STRING),
+                               ColumnSchema("deleted", IS_DELETED,
+                                            /*is_nullable=*/true,
+                                            /*read_default=*/&kReadDefault) },
+                          1);
+  Status s = tablet_schema.GetMappedReadProjection(nullable_projection, &mapped);
+  ASSERT_FALSE(s.ok());
+  ASSERT_STR_CONTAINS(s.ToString(), "must not be nullable");
+
+  Schema no_default_projection({ ColumnSchema("key", STRING),
+                                 ColumnSchema("deleted", IS_DELETED,
+                                              /*is_nullable=*/false,
+                                              /*read_default=*/nullptr) },
+                               1);
+  s = tablet_schema.GetMappedReadProjection(no_default_projection, &mapped);
+  ASSERT_FALSE(s.ok());
+  ASSERT_STR_CONTAINS(s.ToString(), "must have a default value for read");
 }
 
 // Test that the schema can be used to compare and stringify rows.
diff --git a/src/kudu/common/schema.cc b/src/kudu/common/schema.cc
index a035fda..3e4132e 100644
--- a/src/kudu/common/schema.cc
+++ b/src/kudu/common/schema.cc
@@ -401,6 +401,15 @@ Status Schema::GetMappedReadProjection(const Schema& projection,
     int index = find_column(col.name());
     if (col.type_info()->is_virtual()) {
       DCHECK_EQ(kColumnNotFound, index) << "virtual column not expected in tablet schema";
+      if (col.is_nullable()) {
+        return Status::InvalidArgument(Substitute("Virtual column $0 $1 must not be nullable",
+                                                  col.name(), col.TypeToString()));
+      }
+      if (!col.has_read_default()) {
+        return Status::InvalidArgument(Substitute("Virtual column $0 $1 must "
+                                                  "have a default value for read",
+                                                  col.name(), col.TypeToString()));
+      }
       mapped_cols.push_back(col);
       // Generate a "fake" column id for virtual columns.
       mapped_ids.emplace_back(++proj_max_col_id);
diff --git a/src/kudu/common/schema.h b/src/kudu/common/schema.h
index 0eb2d67..322e87c 100644
--- a/src/kudu/common/schema.h
+++ b/src/kudu/common/schema.h
@@ -904,6 +904,7 @@ class Schema {
       if (col.type_info()->type() == IS_DELETED) {
         // Enforce some properties on the virtual column that simplify our
         // implementation.
+        // TODO(KUDU-2692): Consider removing these requirements.
         DCHECK(!col.is_nullable());
         DCHECK(col.has_read_default());