You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by to...@apache.org on 2016/03/17 04:31:35 UTC

[1/3] incubator-kudu git commit: [docs] Improve formatting of CREATE TABLE statements

Repository: incubator-kudu
Updated Branches:
  refs/heads/master 830c3e4fe -> d7de21009


[docs] Improve formatting of CREATE TABLE statements

Change-Id: Ic9e3b3c196caeaa6bb7132ad02e6528e3bb342cd
Reviewed-on: http://gerrit.cloudera.org:8080/2523
Tested-by: Kudu Jenkins
Reviewed-by: David Ribeiro Alves <da...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kudu/commit/9f93dd4f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kudu/tree/9f93dd4f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kudu/diff/9f93dd4f

Branch: refs/heads/master
Commit: 9f93dd4f0f07696fb8140a58bd88a1acb6b04261
Parents: 830c3e4
Author: Misty Stanley-Jones <ms...@cloudera.com>
Authored: Thu Mar 10 09:49:59 2016 -0800
Committer: Misty Stanley-Jones <mi...@apache.org>
Committed: Thu Mar 17 02:30:02 2016 +0000

----------------------------------------------------------------------
 docs/kudu_impala_integration.adoc | 100 ++++++++++++++++++++-------------
 1 file changed, 62 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/9f93dd4f/docs/kudu_impala_integration.adoc
----------------------------------------------------------------------
diff --git a/docs/kudu_impala_integration.adoc b/docs/kudu_impala_integration.adoc
index 9444343..9dff8b7 100755
--- a/docs/kudu_impala_integration.adoc
+++ b/docs/kudu_impala_integration.adoc
@@ -367,9 +367,10 @@ buckets by hashing the `id` column, for simplicity. See
 
 [source,sql]
 ----
-CREATE TABLE my_first_table (
-id BIGINT,
-name STRING
+CREATE TABLE my_first_table
+(
+  id BIGINT,
+  name STRING
 )
 DISTRIBUTE BY HASH (id) INTO 16 BUCKETS
 TBLPROPERTIES(
@@ -408,7 +409,8 @@ TBLPROPERTIES(
   'kudu.table_name' = 'new_table',
   'kudu.master_addresses' = 'kudu-master.example.com:7051',
   'kudu.key_columns' = 'ts, name'
-) AS SELECT * FROM old_table;
+)
+AS SELECT * FROM old_table;
 ----
 [NOTE]
 ====
@@ -454,13 +456,18 @@ CREATE TABLE cust_behavior (
   fulfilled_date BIGINT
 )
 DISTRIBUTE BY RANGE(_id)
-SPLIT ROWS((1439560049342), (1439566253755), (1439572458168), (1439578662581), (1439584866994), (1439591071407))
+SPLIT ROWS((1439560049342),
+           (1439566253755),
+           (1439572458168),
+           (1439578662581),
+           (1439584866994),
+           (1439591071407))
 TBLPROPERTIES(
-'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
-'kudu.table_name' = 'cust_behavior',
-'kudu.master_addresses' = 'a1216.halxg.cloudera.com:7051',
-'kudu.key_columns' = '_id',
-'kudu.num_tablet_replicas' = '3',
+  'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
+  'kudu.table_name' = 'cust_behavior',
+  'kudu.master_addresses' = 'a1216.halxg.cloudera.com:7051',
+  'kudu.key_columns' = '_id',
+  'kudu.num_tablet_replicas' = '3',
 );
 ----
 
@@ -487,8 +494,8 @@ use the following SQL:
 CREATE DATABASE impala_kudu
 USE impala_kudu;
 CREATE TABLE my_first_table (
-id BIGINT,
-name STRING
+  id BIGINT,
+  name STRING
 )
 TBLPROPERTIES(
   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
@@ -597,13 +604,19 @@ CREATE TABLE customers (
   state STRING,
   name STRING,
   purchase_count int32,
-) DISTRIBUTE BY RANGE(state)
-SPLIT ROWS(('al'), ('ak'), ('ar'), .., ('wv'), ('wy'))
+)
+DISTRIBUTE BY RANGE(state)
+SPLIT ROWS(('al'),
+           ('ak'),
+           ('ar'),
+           ...
+           ('wv'),
+           ('wy'))
 TBLPROPERTIES(
-'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
-'kudu.table_name' = 'customers',
-'kudu.master_addresses' = 'kudu-master.example.com:7051',
-'kudu.key_columns' = 'state, name'
+  'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
+  'kudu.table_name' = 'customers',
+  'kudu.master_addresses' = 'kudu-master.example.com:7051',
+  'kudu.key_columns' = 'state, name'
 );
 ----
 
@@ -655,10 +668,10 @@ CREATE TABLE cust_behavior (
 )
 DISTRIBUTE BY HASH (id) INTO 16 BUCKETS
 TBLPROPERTIES(
-'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
-'kudu.table_name' = 'cust_behavior',
-'kudu.master_addresses' = 'kudu-master.example.com:7051',
-'kudu.key_columns' = 'id, sku'
+  'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
+  'kudu.table_name' = 'cust_behavior',
+  'kudu.master_addresses' = 'kudu-master.example.com:7051',
+  'kudu.key_columns' = 'id, sku'
 );
 ----
 
@@ -687,13 +700,20 @@ CREATE TABLE customers (
   state STRING,
   name STRING,
   purchase_count int32,
-) DISTRIBUTE BY RANGE(state, name)
-SPLIT ROWS(('al', ''), ('al', 'm'), ('ak', ''), ('ak', 'm'), .., ('wy', ''), ('wy', 'm'))
+)
+DISTRIBUTE BY RANGE(state, name)
+  SPLIT ROWS(('al', ''),
+             ('al', 'm'),
+             ('ak', ''),
+             ('ak', 'm'),
+             ...
+             ('wy', ''),
+             ('wy', 'm'))
 TBLPROPERTIES(
-'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
-'kudu.table_name' = 'customers',
-'kudu.master_addresses' = 'kudu-master.example.com:7051',
-'kudu.key_columns' = 'state, name'
+  'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
+  'kudu.table_name' = 'customers',
+  'kudu.master_addresses' = 'kudu-master.example.com:7051',
+  'kudu.key_columns' = 'state, name'
 );
 ----
 
@@ -726,12 +746,15 @@ CREATE TABLE cust_behavior (
   fulfilled_date BIGINT
 )
 DISTRIBUTE BY HASH (id) INTO 4 BUCKETS,
-RANGE (sku) SPLIT ROWS(('g'), ('o'), ('u'))
+RANGE (sku)
+  SPLIT ROWS(('g'),
+             ('o'),
+             ('u'))
 TBLPROPERTIES(
-'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
-'kudu.table_name' = 'cust_behavior',
-'kudu.master_addresses' = 'kudu-master.example.com:7051',
-'kudu.key_columns' = 'id, sku'
+  'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
+  'kudu.table_name' = 'cust_behavior',
+  'kudu.master_addresses' = 'kudu-master.example.com:7051',
+  'kudu.key_columns' = 'id, sku'
 );
 ----
 
@@ -758,12 +781,13 @@ CREATE TABLE cust_behavior (
   rating INT,
   fulfilled_date BIGINT
 )
-DISTRIBUTE BY HASH (id) INTO 4 BUCKETS, HASH (sku) INTO 4 BUCKETS
+DISTRIBUTE BY HASH (id) INTO 4 BUCKETS,
+              HASH (sku) INTO 4 BUCKETS
 TBLPROPERTIES(
-'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
-'kudu.table_name' = 'cust_behavior',
-'kudu.master_addresses' = 'kudu-master.example.com:7051',
-'kudu.key_columns' = 'id, sku'
+  'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
+  'kudu.table_name' = 'cust_behavior',
+  'kudu.master_addresses' = 'kudu-master.example.com:7051',
+  'kudu.key_columns' = 'id, sku'
 );
 ----
 


[3/3] incubator-kudu git commit: wire_protocol-test: make string data more realistic

Posted by to...@apache.org.
wire_protocol-test: make string data more realistic

This test has had weirdly bimodal performance since it was introduced -- some
builds are consistently fast, and others are consistently slow, even if the two
builds had no changes to related code in between them.[1]

After much investigation, I determined that the "slow" builds spend a lot more
cycles waiting on L1D cache misses. It seems like the performance depends on
whether the constant string data happens to cross a page boundary. The string
data for the benchmark can move around in the constant section of the binary
(AKA ".rodata") based on other unrelated changes. Since the test was only
using a single string pointer, repeated thousands of times, the location
of that string was major factor in the benchmark result.

The "loads which split a page boundary are slow" issue is only vaguely
documented around the web or in Intel's optimization guides. I found one test
program[2] which demonstrates this effect. 16-byte loads which cross a page
boundary are ~62x slower than those which don't on my Haswell laptop.

In realistic scenarios, the string data would be a separate pointer for each
row, so if the occasional string crosses a page boundary, it won't be so
big a deal as this benchmark makes it out to be. So, this patch just
changes the benchmark to be more realistic and make new copies of the strings
for each row, at different memory locations.

Only time will tell, but my guess is that this will remove the weird
performance variance we've been seeing on this benchmark and make it easier to
evaluate actual changes to this code path.

[1] http://i.imgur.com/gZRBJ8S.png
[2] http://akuvian.org/src/pagesplit.c

Change-Id: I22fc1b1a83ac57c819f320dce6ffc430ddf5662b
Reviewed-on: http://gerrit.cloudera.org:8080/2532
Tested-by: Kudu Jenkins
Reviewed-by: David Ribeiro Alves <da...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kudu/commit/d7de2100
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kudu/tree/d7de2100
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kudu/diff/d7de2100

Branch: refs/heads/master
Commit: d7de2100941bf6c3f472fe8f1022ff83b828d83c
Parents: 99856b7
Author: Todd Lipcon <to...@apache.org>
Authored: Fri Mar 11 17:23:18 2016 -0800
Committer: Todd Lipcon <to...@apache.org>
Committed: Thu Mar 17 03:31:12 2016 +0000

----------------------------------------------------------------------
 src/kudu/common/wire_protocol-test.cc | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/d7de2100/src/kudu/common/wire_protocol-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/wire_protocol-test.cc b/src/kudu/common/wire_protocol-test.cc
index 8091942..502b6e4 100644
--- a/src/kudu/common/wire_protocol-test.cc
+++ b/src/kudu/common/wire_protocol-test.cc
@@ -30,25 +30,36 @@ namespace kudu {
 class WireProtocolTest : public KuduTest {
  public:
   WireProtocolTest()
-    : schema_({ ColumnSchema("col1", STRING),
-                ColumnSchema("col2", STRING),
-                ColumnSchema("col3", UINT32, true /* nullable */) },
-              1) {
+      : schema_({ ColumnSchema("col1", STRING),
+              ColumnSchema("col2", STRING),
+              ColumnSchema("col3", UINT32, true /* nullable */) },
+        1),
+        test_data_arena_(4096, 256 * 1024) {
   }
 
   void FillRowBlockWithTestRows(RowBlock* block) {
+    test_data_arena_.Reset();
     block->selection_vector()->SetAllTrue();
 
     for (int i = 0; i < block->nrows(); i++) {
       RowBlockRow row = block->row(i);
-      *reinterpret_cast<Slice*>(row.mutable_cell_ptr(0)) = Slice("hello world col1");
-      *reinterpret_cast<Slice*>(row.mutable_cell_ptr(1)) = Slice("hello world col2");
+
+      // We make new copies of these strings into the Arena for each row so that
+      // the workload is more realistic. If we just re-use the same Slice object
+      // for each row, the memory accesses fit entirely into a smaller number of
+      // cache lines and we may micro-optimize for the wrong thing.
+      Slice col1, col2;
+      CHECK(test_data_arena_.RelocateSlice("hello world col1", &col1));
+      CHECK(test_data_arena_.RelocateSlice("hello world col2", &col2));
+      *reinterpret_cast<Slice*>(row.mutable_cell_ptr(0)) = col1;
+      *reinterpret_cast<Slice*>(row.mutable_cell_ptr(1)) = col2;
       *reinterpret_cast<uint32_t*>(row.mutable_cell_ptr(2)) = i;
       row.cell(2).set_null(false);
     }
   }
  protected:
   Schema schema_;
+  Arena test_data_arena_;
 };
 
 TEST_F(WireProtocolTest, TestOKStatus) {


[2/3] incubator-kudu git commit: KUDU-418 (part 1): enforce that we don't support changing TS hosts/ports

Posted by to...@apache.org.
KUDU-418 (part 1): enforce that we don't support changing TS hosts/ports

This adds a check in the master that attempts to reject tablet servers which
have changed host/port. We don't currently support this, so allowing them to
register currently just ends up hitting other more hard-to-diagnose issues
later on.

This check will end up being rolled back when we actually implement a proper
fix for KUDU-418. In the meantime, this would have helped me troubleshoot some
issues with client-test where we hit this issue.

Change-Id: I16d9a91059b7dde63a2cac71e1fde0228b6a9f3d
Reviewed-on: http://gerrit.cloudera.org:8080/2513
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kudu/commit/99856b73
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kudu/tree/99856b73
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kudu/diff/99856b73

Branch: refs/heads/master
Commit: 99856b73ec6d776ca40a7abf46d0691bb64e95b1
Parents: 9f93dd4
Author: Todd Lipcon <to...@apache.org>
Authored: Wed Mar 9 17:25:59 2016 -0800
Committer: Todd Lipcon <to...@apache.org>
Committed: Thu Mar 17 03:30:39 2016 +0000

----------------------------------------------------------------------
 src/kudu/master/master-test.cc   | 15 +++++++++++++++
 src/kudu/master/ts_descriptor.cc | 13 +++++++++++++
 2 files changed, 28 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/99856b73/src/kudu/master/master-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/master-test.cc b/src/kudu/master/master-test.cc
index aeebbf1..e1c8dc9 100644
--- a/src/kudu/master/master-test.cc
+++ b/src/kudu/master/master-test.cc
@@ -209,6 +209,21 @@ TEST_F(MasterTest, TestRegisterAndHeartbeat) {
     ASSERT_EQ("my-ts-uuid", resp.servers(0).instance_id().permanent_uuid());
     ASSERT_EQ(1, resp.servers(0).instance_id().instance_seqno());
   }
+
+  // Ensure that trying to re-register with a different port fails.
+  {
+    TSHeartbeatRequestPB req;
+    TSHeartbeatResponsePB resp;
+    RpcController rpc;
+    req.mutable_common()->CopyFrom(common);
+    req.mutable_registration()->CopyFrom(fake_reg);
+    req.mutable_registration()->mutable_rpc_addresses(0)->set_port(1001);
+    Status s = proxy_->TSHeartbeat(req, &resp, &rpc);
+    ASSERT_TRUE(s.IsRemoteError());
+    ASSERT_STR_CONTAINS(s.ToString(),
+                        "Tablet server my-ts-uuid is attempting to re-register "
+                        "with a different host/port.");
+  }
 }
 
 Status MasterTest::CreateTable(const string& table_name,

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/99856b73/src/kudu/master/ts_descriptor.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/ts_descriptor.cc b/src/kudu/master/ts_descriptor.cc
index d92a600..7dbf5cc 100644
--- a/src/kudu/master/ts_descriptor.cc
+++ b/src/kudu/master/ts_descriptor.cc
@@ -61,6 +61,19 @@ Status TSDescriptor::Register(const NodeInstancePB& instance,
   boost::lock_guard<simple_spinlock> l(lock_);
   CHECK_EQ(instance.permanent_uuid(), permanent_uuid_);
 
+  // TODO(KUDU-418): we don't currently support changing IPs or hosts since the
+  // host/port is stored persistently in each tablet's metadata.
+  if (registration_ && registration_->ShortDebugString() != registration.ShortDebugString()) {
+    string msg = strings::Substitute(
+        "Tablet server $0 is attempting to re-register with a different host/port. "
+        "This is not currently supported. Old: {$1} New: {$2}",
+        instance.permanent_uuid(),
+        registration_->ShortDebugString(),
+        registration.ShortDebugString());
+    LOG(ERROR) << msg;
+    return Status::InvalidArgument(msg);
+  }
+
   if (instance.instance_seqno() < latest_seqno_) {
     return Status::AlreadyPresent(
       strings::Substitute("Cannot register with sequence number $0:"