You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by aw...@apache.org on 2019/08/24 00:59:09 UTC

[kudu] branch master updated (0f38821 -> 8af4883)

This is an automated email from the ASF dual-hosted git repository.

awong pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git.


    from 0f38821  [thirdparty] introduce chrony
     new ba80989  [test] handle ScanTableToStrings() result status
     new 8124d94  [docs]: Fix bad markdown formatting in tablet.md
     new 7116a08  KUDU-2847: Optimize iteration over selection vector in SerializeRowBlock
     new 81353eb  [util] Add ExtractFloat method to JsonReader and improve ExtractDouble method
     new 8af4883  test: deflake ksck_remote-test TestChecksumSnapshotCurrentTimestamp

The 5 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 docs/design-docs/tablet.md                         |   4 +-
 src/kudu/client/client-test-util.cc                |  19 ++--
 src/kudu/client/client-test-util.h                 |   9 +-
 src/kudu/client/client-test.cc                     | 116 ++++++++++-----------
 src/kudu/common/rowblock-test.cc                   |  21 ++++
 src/kudu/common/rowblock.cc                        |  30 ++++++
 src/kudu/common/rowblock.h                         |   4 +
 src/kudu/common/wire_protocol-test.cc              |  27 +++--
 src/kudu/common/wire_protocol.cc                   |  80 +++++++-------
 src/kudu/integration-tests/alter_table-test.cc     |   2 +-
 .../integration-tests/flex_partitioning-itest.cc   |   4 +-
 src/kudu/tools/ksck_remote-test.cc                 |  12 ++-
 src/kudu/tools/tool_action_table.cc                |   6 +-
 src/kudu/util/jsonreader-test.cc                   |  58 ++++++++++-
 src/kudu/util/jsonreader.cc                        |  46 ++++++--
 src/kudu/util/jsonreader.h                         |   6 ++
 16 files changed, 299 insertions(+), 145 deletions(-)


[kudu] 04/05: [util] Add ExtractFloat method to JsonReader and improve ExtractDouble method

Posted by aw...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 81353eb38d86694eefeb5fbacef300079f2f58c6
Author: zhangyifan27 <ch...@163.com>
AuthorDate: Mon Aug 19 17:51:42 2019 +0800

    [util] Add ExtractFloat method to JsonReader and improve ExtractDouble method
    
    ExtractDouble method should not only parse double values, int values
    that can be losslessly converted to double should also be parsed.
    
    Though there is already IsLosslessFloat() in rapidjson library,
    the implementation has some precision issues.
    This patch re-implement the method.
    
    Change-Id: Ica0a04fb21daceaa44233995a42a60686f59361b
    Reviewed-on: http://gerrit.cloudera.org:8080/14114
    Tested-by: Kudu Jenkins
    Reviewed-by: Andrew Wong <aw...@cloudera.com>
---
 src/kudu/tools/tool_action_table.cc |  6 ++--
 src/kudu/util/jsonreader-test.cc    | 58 +++++++++++++++++++++++++++++++++++--
 src/kudu/util/jsonreader.cc         | 46 ++++++++++++++++++++++++-----
 src/kudu/util/jsonreader.h          |  6 ++++
 4 files changed, 103 insertions(+), 13 deletions(-)

diff --git a/src/kudu/tools/tool_action_table.cc b/src/kudu/tools/tool_action_table.cc
index 5739186..6fcd83c 100644
--- a/src/kudu/tools/tool_action_table.cc
+++ b/src/kudu/tools/tool_action_table.cc
@@ -708,10 +708,10 @@ Status ParseValueOfType(const string& default_value,
       break;
     }
     case KuduColumnSchema::DataType::FLOAT: {
-      double double_value;
+      float float_value;
       RETURN_NOT_OK_PREPEND(
-        reader.ExtractDouble(values[0], /*field=*/nullptr, &double_value), msg);
-      *value = KuduValue::FromFloat(double_value);
+        reader.ExtractFloat(values[0], /*field=*/nullptr, &float_value), msg);
+      *value = KuduValue::FromFloat(float_value);
       break;
     }
     case KuduColumnSchema::DataType::DOUBLE: {
diff --git a/src/kudu/util/jsonreader-test.cc b/src/kudu/util/jsonreader-test.cc
index b321538..0b4a58c 100644
--- a/src/kudu/util/jsonreader-test.cc
+++ b/src/kudu/util/jsonreader-test.cc
@@ -59,6 +59,7 @@ TEST(JsonReaderTest, Empty) {
   ASSERT_TRUE(r.ExtractUint32(r.root(), "foo", nullptr).IsNotFound());
   ASSERT_TRUE(r.ExtractUint64(r.root(), "foo", nullptr).IsNotFound());
   ASSERT_TRUE(r.ExtractDouble(r.root(), "foo", nullptr).IsNotFound());
+  ASSERT_TRUE(r.ExtractFloat(r.root(), "foo", nullptr).IsNotFound());
   ASSERT_TRUE(r.ExtractString(r.root(), "foo", nullptr).IsNotFound());
   ASSERT_TRUE(r.ExtractObject(r.root(), "foo", nullptr).IsNotFound());
   ASSERT_TRUE(r.ExtractObjectArray(r.root(), "foo", nullptr).IsNotFound());
@@ -78,6 +79,7 @@ TEST(JsonReaderTest, Basic) {
   ASSERT_TRUE(r.ExtractUint32(r.root(), "foo", nullptr).IsInvalidArgument());
   ASSERT_TRUE(r.ExtractUint64(r.root(), "foo", nullptr).IsInvalidArgument());
   ASSERT_TRUE(r.ExtractDouble(r.root(), "foo", nullptr).IsInvalidArgument());
+  ASSERT_TRUE(r.ExtractFloat(r.root(), "foo", nullptr).IsInvalidArgument());
   ASSERT_TRUE(r.ExtractObject(r.root(), "foo", nullptr).IsInvalidArgument());
   ASSERT_TRUE(r.ExtractObjectArray(r.root(), "foo", nullptr).IsInvalidArgument());
 }
@@ -121,6 +123,7 @@ TEST(JsonReaderTest, LessBasic) {
   ASSERT_TRUE(r.ExtractUint32(r.root(), "null", nullptr).IsInvalidArgument());
   ASSERT_TRUE(r.ExtractUint64(r.root(), "null", nullptr).IsInvalidArgument());
   ASSERT_TRUE(r.ExtractDouble(r.root(), "null", nullptr).IsInvalidArgument());
+  ASSERT_TRUE(r.ExtractFloat(r.root(), "null", nullptr).IsInvalidArgument());
   ASSERT_TRUE(r.ExtractObject(r.root(), "null", nullptr).IsInvalidArgument());
   ASSERT_TRUE(r.ExtractObjectArray(r.root(), "null", nullptr).IsInvalidArgument());
 
@@ -130,6 +133,7 @@ TEST(JsonReaderTest, LessBasic) {
   ASSERT_TRUE(r.ExtractUint32(r.root(), "empty", nullptr).IsInvalidArgument());
   ASSERT_TRUE(r.ExtractUint64(r.root(), "empty", nullptr).IsInvalidArgument());
   ASSERT_TRUE(r.ExtractDouble(r.root(), "empty", nullptr).IsInvalidArgument());
+  ASSERT_TRUE(r.ExtractFloat(r.root(), "empty", nullptr).IsInvalidArgument());
   ASSERT_TRUE(r.ExtractObject(r.root(), "empty", nullptr).IsInvalidArgument());
   ASSERT_TRUE(r.ExtractObjectArray(r.root(), "empty", nullptr).IsInvalidArgument());
 
@@ -138,6 +142,7 @@ TEST(JsonReaderTest, LessBasic) {
   ASSERT_TRUE(r.ExtractUint32(r.root(), "bool", nullptr).IsInvalidArgument());
   ASSERT_TRUE(r.ExtractUint64(r.root(), "bool", nullptr).IsInvalidArgument());
   ASSERT_TRUE(r.ExtractDouble(r.root(), "bool", nullptr).IsInvalidArgument());
+  ASSERT_TRUE(r.ExtractFloat(r.root(), "bool", nullptr).IsInvalidArgument());
   ASSERT_TRUE(r.ExtractString(r.root(), "bool", nullptr).IsInvalidArgument());
   ASSERT_TRUE(r.ExtractObject(r.root(), "bool", nullptr).IsInvalidArgument());
   ASSERT_TRUE(r.ExtractObjectArray(r.root(), "bool", nullptr).IsInvalidArgument());
@@ -168,6 +173,12 @@ TEST(JsonReaderTest, SignedAndUnsignedInts) {
   ASSERT_EQ(-1, negative64);
   ASSERT_TRUE(r.ExtractUint32(r.root(), negative, nullptr).IsInvalidArgument());
   ASSERT_TRUE(r.ExtractUint64(r.root(), negative, nullptr).IsInvalidArgument());
+  double negative_double;
+  ASSERT_OK(r.ExtractDouble(r.root(), negative, &negative_double));
+  ASSERT_EQ(-1, negative_double);
+  float negative_float;
+  ASSERT_OK(r.ExtractFloat(r.root(), negative, &negative_float));
+  ASSERT_EQ(-1, negative_float);
 
   // Max signed 32-bit integer.
   const char* const signed_big32 = "signed_big32";
@@ -183,6 +194,10 @@ TEST(JsonReaderTest, SignedAndUnsignedInts) {
   uint64_t signed_big32_uint64;
   ASSERT_OK(r.ExtractUint64(r.root(), signed_big32, &signed_big32_uint64));
   ASSERT_EQ(kMaxInt32, signed_big32_uint64);
+  double signed_big32_double;
+  ASSERT_OK(r.ExtractDouble(r.root(), signed_big32, &signed_big32_double));
+  ASSERT_EQ(kMaxInt32, signed_big32_double);
+  ASSERT_TRUE(r.ExtractFloat(r.root(), signed_big32, nullptr).IsInvalidArgument());
 
   // Max signed 64-bit integer.
   const char* const signed_big64 = "signed_big64";
@@ -195,6 +210,7 @@ TEST(JsonReaderTest, SignedAndUnsignedInts) {
   ASSERT_OK(r.ExtractUint64(r.root(), signed_big64, &signed_big64_uint64));
   ASSERT_EQ(kMaxInt64, signed_big64_uint64);
   ASSERT_TRUE(r.ExtractDouble(r.root(), signed_big64, nullptr).IsInvalidArgument());
+  ASSERT_TRUE(r.ExtractFloat(r.root(), signed_big64, nullptr).IsInvalidArgument());
 
   // Max unsigned 32-bit integer.
   const char* const unsigned_big32 = "unsigned_big32";
@@ -208,6 +224,10 @@ TEST(JsonReaderTest, SignedAndUnsignedInts) {
   uint64_t unsigned_big32_uint64;
   ASSERT_OK(r.ExtractUint64(r.root(), unsigned_big32, &unsigned_big32_uint64));
   ASSERT_EQ(kMaxUint32, unsigned_big32_uint64);
+  double unsigned_big32_double;
+  ASSERT_OK(r.ExtractDouble(r.root(), unsigned_big32, &unsigned_big32_double));
+  ASSERT_EQ(kMaxUint32, unsigned_big32_double);
+  ASSERT_TRUE(r.ExtractFloat(r.root(), unsigned_big32, nullptr).IsInvalidArgument());
 
   // Max unsigned 64-bit integer.
   const char* const unsigned_big64 = "unsigned_big64";
@@ -218,6 +238,7 @@ TEST(JsonReaderTest, SignedAndUnsignedInts) {
   ASSERT_OK(r.ExtractUint64(r.root(), unsigned_big64, &unsigned_big64_uint64));
   ASSERT_EQ(kMaxUint64, unsigned_big64_uint64);
   ASSERT_TRUE(r.ExtractDouble(r.root(), unsigned_big64, nullptr).IsInvalidArgument());
+  ASSERT_TRUE(r.ExtractFloat(r.root(), unsigned_big64, nullptr).IsInvalidArgument());
 
   // Min signed 32-bit integer.
   const char* const signed_small32 = "signed_small32";
@@ -229,8 +250,14 @@ TEST(JsonReaderTest, SignedAndUnsignedInts) {
   ASSERT_EQ(kMinInt32, small32_int64);
   ASSERT_TRUE(r.ExtractUint32(r.root(), signed_small32, nullptr).IsInvalidArgument());
   ASSERT_TRUE(r.ExtractUint64(r.root(), signed_small32, nullptr).IsInvalidArgument());
-
-  // Min signed 32-bit integer.
+  double small32_double;
+  ASSERT_OK(r.ExtractDouble(r.root(), signed_small32, &small32_double));
+  ASSERT_EQ(kMinInt32, small32_double);
+  float small32_float;
+  ASSERT_OK(r.ExtractFloat(r.root(), signed_small32, &small32_float));
+  ASSERT_EQ(kMinInt32, small32_float);
+
+  // Min signed 64-bit integer.
   const char* const signed_small64 = "signed_small64";
   ASSERT_TRUE(r.ExtractInt32(r.root(), signed_small64, nullptr).IsInvalidArgument());
   int64_t small64_int64;
@@ -238,6 +265,12 @@ TEST(JsonReaderTest, SignedAndUnsignedInts) {
   ASSERT_EQ(kMinInt64, small64_int64);
   ASSERT_TRUE(r.ExtractUint32(r.root(), signed_small64, nullptr).IsInvalidArgument());
   ASSERT_TRUE(r.ExtractUint64(r.root(), signed_small64, nullptr).IsInvalidArgument());
+  double small64_double;
+  ASSERT_OK(r.ExtractDouble(r.root(), signed_small64, &small64_double));
+  ASSERT_EQ(kMinInt64, small64_double);
+  float small64_float;
+  ASSERT_OK(r.ExtractFloat(r.root(), signed_small64, &small64_float));
+  ASSERT_EQ(kMinInt64, small64_float);
 }
 
 TEST(JsonReaderTest, Doubles) {
@@ -259,6 +292,25 @@ TEST(JsonReaderTest, Doubles) {
   ASSERT_TRUE(r.ExtractObjectArray(r.root(), "foo", nullptr).IsInvalidArgument());
 }
 
+TEST(JsonReaderTest, Floats) {
+  JsonReader r("{ \"foo\" : 5.125 }");
+  ASSERT_OK(r.Init());
+
+  float foo;
+  ASSERT_OK(r.ExtractFloat(r.root(), "foo", &foo));
+  ASSERT_EQ(5.125, foo);
+
+  // Bad types.
+  ASSERT_TRUE(r.ExtractBool(r.root(), "foo", nullptr).IsInvalidArgument());
+  ASSERT_TRUE(r.ExtractInt32(r.root(), "foo", nullptr).IsInvalidArgument());
+  ASSERT_TRUE(r.ExtractInt64(r.root(), "foo", nullptr).IsInvalidArgument());
+  ASSERT_TRUE(r.ExtractUint32(r.root(), "foo", nullptr).IsInvalidArgument());
+  ASSERT_TRUE(r.ExtractUint64(r.root(), "foo", nullptr).IsInvalidArgument());
+  ASSERT_TRUE(r.ExtractString(r.root(), "foo", nullptr).IsInvalidArgument());
+  ASSERT_TRUE(r.ExtractObject(r.root(), "foo", nullptr).IsInvalidArgument());
+  ASSERT_TRUE(r.ExtractObjectArray(r.root(), "foo", nullptr).IsInvalidArgument());
+}
+
 TEST(JsonReaderTest, Objects) {
   JsonReader r("{ \"foo\" : { \"1\" : 1 } }");
   ASSERT_OK(r.Init());
@@ -278,6 +330,7 @@ TEST(JsonReaderTest, Objects) {
   ASSERT_TRUE(r.ExtractUint32(r.root(), "foo", nullptr).IsInvalidArgument());
   ASSERT_TRUE(r.ExtractUint64(r.root(), "foo", nullptr).IsInvalidArgument());
   ASSERT_TRUE(r.ExtractDouble(r.root(), "foo", nullptr).IsInvalidArgument());
+  ASSERT_TRUE(r.ExtractFloat(r.root(), "foo", nullptr).IsInvalidArgument());
   ASSERT_TRUE(r.ExtractString(r.root(), "foo", nullptr).IsInvalidArgument());
   ASSERT_TRUE(r.ExtractObjectArray(r.root(), "foo", nullptr).IsInvalidArgument());
 }
@@ -302,6 +355,7 @@ TEST(JsonReaderTest, TopLevelArray) {
   ASSERT_TRUE(r.ExtractUint32(r.root(), nullptr, nullptr).IsInvalidArgument());
   ASSERT_TRUE(r.ExtractUint64(r.root(), nullptr, nullptr).IsInvalidArgument());
   ASSERT_TRUE(r.ExtractDouble(r.root(), nullptr, nullptr).IsInvalidArgument());
+  ASSERT_TRUE(r.ExtractFloat(r.root(), nullptr, nullptr).IsInvalidArgument());
   ASSERT_TRUE(r.ExtractString(r.root(), nullptr, nullptr).IsInvalidArgument());
   ASSERT_TRUE(r.ExtractObject(r.root(), nullptr, nullptr).IsInvalidArgument());
 }
diff --git a/src/kudu/util/jsonreader.cc b/src/kudu/util/jsonreader.cc
index 05cde80..d6f233a 100644
--- a/src/kudu/util/jsonreader.cc
+++ b/src/kudu/util/jsonreader.cc
@@ -20,6 +20,7 @@
 #include <ostream>
 #include <utility>
 
+#include <cmath>
 #include <glog/logging.h>
 #include <rapidjson/error/en.h>
 #include <rapidjson/rapidjson.h>
@@ -121,7 +122,8 @@ Status JsonReader::ExtractInt64(const Value* object,
   if (PREDICT_FALSE(!val->IsInt64())) {
     return Status::InvalidArgument(Substitute(
         "wrong type during field extraction: expected int64 but got $0",
-        TypeToString(val->GetType())));  }
+        TypeToString(val->GetType())));
+  }
   *result = val->GetInt64();
   return Status::OK();
 }
@@ -134,7 +136,8 @@ Status JsonReader::ExtractUint64(const Value* object,
   if (PREDICT_FALSE(!val->IsUint64())) {
     return Status::InvalidArgument(Substitute(
         "wrong type during field extraction: expected uint64 but got $0",
-        TypeToString(val->GetType())));  }
+        TypeToString(val->GetType())));
+  }
   *result = val->GetUint64();
   return Status::OK();
 }
@@ -144,14 +147,29 @@ Status JsonReader::ExtractDouble(const Value* object,
                                  double* result) const {
   const Value* val;
   RETURN_NOT_OK(ExtractField(object, field, &val));
-  if (PREDICT_FALSE(!val->IsDouble())) {
+  if (PREDICT_FALSE(!val->IsLosslessDouble())) {
     return Status::InvalidArgument(Substitute(
-        "wrong type during field extraction: expected double but got $0",
-        TypeToString(val->GetType())));  }
+        "wrong type during field extraction: expected a lossless double type but got $0",
+        TypeToString(val->GetType())));
+  }
   *result = val->GetDouble();
   return Status::OK();
 }
 
+Status JsonReader::ExtractFloat(const Value* object,
+                                const char* field,
+                                float* result) const {
+  const Value* val;
+  RETURN_NOT_OK(ExtractField(object, field, &val));
+  if (PREDICT_FALSE(!IsLosslessFloatValue(val))) {
+    return Status::InvalidArgument(Substitute(
+        "wrong type during field extraction: expected a lossless float type but got $0",
+        TypeToString(val->GetType())));
+  }
+  *result = val->GetFloat();
+  return Status::OK();
+}
+
 Status JsonReader::ExtractString(const Value* object,
                                  const char* field,
                                  string* result) const {
@@ -164,7 +182,8 @@ Status JsonReader::ExtractString(const Value* object,
     }
     return Status::InvalidArgument(Substitute(
         "wrong type during field extraction: expected string but got $0",
-        TypeToString(val->GetType())));  }
+        TypeToString(val->GetType())));
+  }
   result->assign(val->GetString());
   return Status::OK();
 }
@@ -177,7 +196,8 @@ Status JsonReader::ExtractObject(const Value* object,
   if (PREDICT_FALSE(!val->IsObject())) {
     return Status::InvalidArgument(Substitute(
         "wrong type during field extraction: expected object but got $0",
-        TypeToString(val->GetType())));  }
+        TypeToString(val->GetType())));
+  }
   *result = val;
   return Status::OK();
 }
@@ -190,7 +210,8 @@ Status JsonReader::ExtractObjectArray(const Value* object,
   if (PREDICT_FALSE(!val->IsArray())) {
     return Status::InvalidArgument(Substitute(
         "wrong type during field extraction: expected object array but got $0",
-        TypeToString(val->GetType())));  }
+        TypeToString(val->GetType())));
+  }
   for (Value::ConstValueIterator iter = val->Begin(); iter != val->End(); ++iter) {
     result->push_back(iter);
   }
@@ -207,4 +228,13 @@ Status JsonReader::ExtractField(const Value* object,
   return Status::OK();
 }
 
+bool JsonReader::IsLosslessFloatValue(const Value* value) const {
+  if (!value->IsLosslessDouble()) {
+    return false;
+  }
+  double a = value->GetDouble();
+  double b = static_cast<double>(static_cast<float>(a));
+  return fabs(a-b) <= 1e-7;
+}
+
 } // namespace kudu
diff --git a/src/kudu/util/jsonreader.h b/src/kudu/util/jsonreader.h
index be2412b..125b762 100644
--- a/src/kudu/util/jsonreader.h
+++ b/src/kudu/util/jsonreader.h
@@ -76,6 +76,10 @@ class JsonReader {
                        const char* field,
                        double* result) const;
 
+  Status ExtractFloat(const rapidjson::Value* object,
+                      const char* field,
+                      float* result) const;
+
   // 'result' is only valid for as long as JsonReader is alive.
   Status ExtractObject(const rapidjson::Value* object,
                        const char* field,
@@ -93,6 +97,8 @@ class JsonReader {
                       const char* field,
                       const rapidjson::Value** result) const;
 
+  bool IsLosslessFloatValue(const rapidjson::Value* value) const;
+
   std::string text_;
   rapidjson::Document document_;
 


[kudu] 01/05: [test] handle ScanTableToStrings() result status

Posted by aw...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit ba809891e723c056b7b4f95f180a861536ec60ae
Author: Alexey Serbin <as...@cloudera.com>
AuthorDate: Tue Oct 30 10:27:04 2018 -0700

    [test] handle ScanTableToStrings() result status
    
    Updated tests to check for the result status of the
    kudu::client::ScanTableToStrings() utility function.
    
    Change-Id: I97a68f4c3ba5041848adddbefeb22e64b42a2745
    Reviewed-on: http://gerrit.cloudera.org:8080/11825
    Tested-by: Alexey Serbin <as...@cloudera.com>
    Reviewed-by: Yingchun Lai <40...@qq.com>
---
 src/kudu/client/client-test-util.cc                |  19 ++--
 src/kudu/client/client-test-util.h                 |   9 +-
 src/kudu/client/client-test.cc                     | 116 ++++++++++-----------
 src/kudu/integration-tests/alter_table-test.cc     |   2 +-
 .../integration-tests/flex_partitioning-itest.cc   |   4 +-
 5 files changed, 78 insertions(+), 72 deletions(-)

diff --git a/src/kudu/client/client-test-util.cc b/src/kudu/client/client-test-util.cc
index 678386a..4e65af5 100644
--- a/src/kudu/client/client-test-util.cc
+++ b/src/kudu/client/client-test-util.cc
@@ -17,18 +17,17 @@
 
 #include "kudu/client/client-test-util.h"
 
+#include <algorithm>
 #include <ostream>
 #include <string>
 #include <vector>
 
 #include <glog/logging.h>
-#include <gtest/gtest.h>
 
 #include "kudu/client/scan_batch.h"
 #include "kudu/client/write_op.h"
 #include "kudu/gutil/stl_util.h"
 #include "kudu/util/status.h"
-#include "kudu/util/test_macros.h"
 
 using std::string;
 using std::vector;
@@ -59,19 +58,25 @@ void LogSessionErrorsAndDie(const sp::shared_ptr<KuduSession>& session,
   CHECK_OK(s); // will fail
 }
 
-void ScanTableToStrings(KuduTable* table, vector<string>* row_strings) {
+Status ScanTableToStrings(KuduTable* table,
+                          vector<string>* row_strings,
+                          ScannedRowsOrder rows_order) {
   row_strings->clear();
   KuduScanner scanner(table);
   // TODO(dralves) Change this to READ_AT_SNAPSHOT, fault tolerant scan and get rid
   // of the retry code below.
-  ASSERT_OK(scanner.SetSelection(KuduClient::LEADER_ONLY));
-  ASSERT_OK(scanner.SetTimeoutMillis(15000));
-  ASSERT_OK(ScanToStrings(&scanner, row_strings));
+  RETURN_NOT_OK(scanner.SetSelection(KuduClient::LEADER_ONLY));
+  RETURN_NOT_OK(scanner.SetTimeoutMillis(15000));
+  RETURN_NOT_OK(ScanToStrings(&scanner, row_strings));
+  if (rows_order == ScannedRowsOrder::kSorted) {
+    std::sort(row_strings->begin(), row_strings->end());
+  }
+  return Status::OK();
 }
 
 int64_t CountTableRows(KuduTable* table) {
   vector<string> rows;
-  client::ScanTableToStrings(table, &rows);
+  CHECK_OK(client::ScanTableToStrings(table, &rows));
   return rows.size();
 }
 
diff --git a/src/kudu/client/client-test-util.h b/src/kudu/client/client-test-util.h
index 0d28481..5d8ee11 100644
--- a/src/kudu/client/client-test-util.h
+++ b/src/kudu/client/client-test-util.h
@@ -44,8 +44,15 @@ inline void FlushSessionOrDie(const sp::shared_ptr<KuduSession>& session) {
   }
 }
 
+enum class ScannedRowsOrder {
+  kAsIs,
+  kSorted,
+};
+
 // Scans in LEADER_ONLY mode, returning stringified rows in the given vector.
-void ScanTableToStrings(KuduTable* table, std::vector<std::string>* row_strings);
+Status ScanTableToStrings(KuduTable* table,
+                          std::vector<std::string>* row_strings,
+                          ScannedRowsOrder rows_order = ScannedRowsOrder::kAsIs);
 
 // Count the number of rows in the table in LEADER_ONLY mode.
 int64_t CountTableRows(KuduTable* table);
diff --git a/src/kudu/client/client-test.cc b/src/kudu/client/client-test.cc
index f60bb08..cfc5d59 100644
--- a/src/kudu/client/client-test.cc
+++ b/src/kudu/client/client-test.cc
@@ -1423,7 +1423,7 @@ TEST_F(ClientTest, TestScanFaultTolerance) {
 
   // Do an initial scan to determine the expected rows for later verification.
   vector<string> expected_rows;
-  ScanTableToStrings(table.get(), &expected_rows);
+  ASSERT_OK(ScanTableToStrings(table.get(), &expected_rows));
 
   // Iterate with no limit and with a lower limit than the expected rows.
   vector<int64_t> limits = { -1, static_cast<int64_t>(expected_rows.size() / 2) };
@@ -2057,7 +2057,7 @@ TEST_F(ClientTest, TestScanWithEncodedRangePredicate) {
   NO_FATALS(InsertTestRows(table.get(), 100));
 
   vector<string> all_rows;
-  NO_FATALS(ScanTableToStrings(table.get(), &all_rows));
+  ASSERT_OK(ScanTableToStrings(table.get(), &all_rows));
   ASSERT_EQ(100, all_rows.size());
 
   unique_ptr<KuduPartialRow> row(table->schema().NewRow());
@@ -2611,12 +2611,10 @@ TEST_F(ClientTest, TestMultipleMultiRowManualBatches) {
 
   // Verify the data looks right.
   vector<string> rows;
-  ScanTableToStrings(client_table_.get(), &rows);
-  std::sort(rows.begin(), rows.end());
+  ASSERT_OK(ScanTableToStrings(client_table_.get(), &rows, ScannedRowsOrder::kSorted));
   ASSERT_EQ(kNumRowsPerTablet, rows.size());
   ASSERT_EQ(R"((int32 key=0, int32 int_val=0, string string_val="hello world", )"
-            "int32 non_null_with_default=12345)"
-            , rows[0]);
+            "int32 non_null_with_default=12345)", rows[0]);
 }
 
 // Test a batch where one of the inserted rows succeeds while another fails.
@@ -2648,9 +2646,8 @@ TEST_F(ClientTest, TestBatchWithPartialErrorOfDuplicateKeys) {
 
   // Verify that the other row was successfully inserted
   vector<string> rows;
-  NO_FATALS(ScanTableToStrings(client_table_.get(), &rows));
+  ASSERT_OK(ScanTableToStrings(client_table_.get(), &rows, ScannedRowsOrder::kSorted));
   ASSERT_EQ(2, rows.size());
-  std::sort(rows.begin(), rows.end());
   ASSERT_EQ(R"((int32 key=1, int32 int_val=1, string string_val="original row", )"
             "int32 non_null_with_default=12345)", rows[0]);
   ASSERT_EQ(R"((int32 key=2, int32 int_val=1, string string_val="Should succeed", )"
@@ -2688,9 +2685,8 @@ TEST_F(ClientTest, TestBatchWithPartialErrorOfMissingRequiredColumn) {
 
   // Verify that the other row was successfully inserted
   vector<string> rows;
-  NO_FATALS(ScanTableToStrings(client_table_.get(), &rows));
+  ASSERT_OK(ScanTableToStrings(client_table_.get(), &rows));
   ASSERT_EQ(1, rows.size());
-  std::sort(rows.begin(), rows.end());
   ASSERT_EQ(R"((int32 key=1, int32 int_val=1, string string_val="Should succeed", )"
             "int32 non_null_with_default=1)", rows[0]);
 }
@@ -2725,9 +2721,8 @@ TEST_F(ClientTest, TestBatchWithPartialErrorOfNoFieldsUpdated) {
 
   // Verify that the other row was successfully updated.
   vector<string> rows;
-  NO_FATALS(ScanTableToStrings(client_table_.get(), &rows));
+  ASSERT_OK(ScanTableToStrings(client_table_.get(), &rows, ScannedRowsOrder::kSorted));
   ASSERT_EQ(2, rows.size());
-  std::sort(rows.begin(), rows.end());
   ASSERT_EQ(R"((int32 key=1, int32 int_val=1, string string_val="One", )"
             "int32 non_null_with_default=12345)", rows[0]);
   ASSERT_EQ(R"((int32 key=2, int32 int_val=22, string string_val="Two", )"
@@ -2763,9 +2758,8 @@ TEST_F(ClientTest, TestBatchWithPartialErrorOfNonKeyColumnSpecifiedDelete) {
 
   // Verify that the other row was successfully deleted.
   vector<string> rows;
-  NO_FATALS(ScanTableToStrings(client_table_.get(), &rows));
+  ASSERT_OK(ScanTableToStrings(client_table_.get(), &rows));
   ASSERT_EQ(1, rows.size());
-  std::sort(rows.begin(), rows.end());
   ASSERT_EQ(R"((int32 key=2, int32 int_val=2, string string_val="Two", )"
             "int32 non_null_with_default=12345)", rows[0]);
 }
@@ -2807,9 +2801,8 @@ TEST_F(ClientTest, TestBatchWithPartialErrorOfAllRowsFailed) {
 
   // Verify that no row was deleted.
   vector<string> rows;
-  NO_FATALS(ScanTableToStrings(client_table_.get(), &rows));
+  ASSERT_OK(ScanTableToStrings(client_table_.get(), &rows, ScannedRowsOrder::kSorted));
   ASSERT_EQ(2, rows.size());
-  std::sort(rows.begin(), rows.end());
   ASSERT_EQ(R"((int32 key=1, int32 int_val=1, string string_val="One", )"
             "int32 non_null_with_default=12345)", rows[0]);
   ASSERT_EQ(R"((int32 key=2, int32 int_val=2, string string_val="Two", )"
@@ -2875,8 +2868,8 @@ void ClientTest::DoApplyWithoutFlushTest(int sleep_micros) {
 
   // Should have no rows.
   vector<string> rows;
-  ScanTableToStrings(client_table_.get(), &rows);
-  ASSERT_EQ(0, rows.size());
+  ASSERT_OK(ScanTableToStrings(client_table_.get(), &rows));
+  ASSERT_TRUE(rows.empty());
 }
 
 
@@ -3330,8 +3323,8 @@ TEST_F(ClientTest, TestAutoFlushBackgroundDropSession) {
   // should notice that and do not perform flush, so no data is supposed
   // to appear in the table.
   vector<string> rows;
-  ScanTableToStrings(client_table_.get(), &rows);
-  EXPECT_TRUE(rows.empty());
+  ASSERT_OK(ScanTableToStrings(client_table_.get(), &rows));
+  ASSERT_TRUE(rows.empty());
 }
 
 // A test scenario for AUTO_FLUSH_BACKGROUND mode:
@@ -3617,7 +3610,7 @@ TEST_F(ClientTest, TestMutationsWork) {
   ASSERT_OK(ApplyUpdateToSession(session.get(), client_table_, 1, 2));
   FlushSessionOrDie(session);
   vector<string> rows;
-  ScanTableToStrings(client_table_.get(), &rows);
+  ASSERT_OK(ScanTableToStrings(client_table_.get(), &rows));
   ASSERT_EQ(1, rows.size());
   ASSERT_EQ(R"((int32 key=1, int32 int_val=2, string string_val="original row", )"
             "int32 non_null_with_default=12345)", rows[0]);
@@ -3625,8 +3618,8 @@ TEST_F(ClientTest, TestMutationsWork) {
 
   ASSERT_OK(ApplyDeleteToSession(session.get(), client_table_, 1));
   FlushSessionOrDie(session);
-  ScanTableToStrings(client_table_.get(), &rows);
-  ASSERT_EQ(0, rows.size());
+  ASSERT_OK(ScanTableToStrings(client_table_.get(), &rows));
+  ASSERT_TRUE(rows.empty());
 }
 
 TEST_F(ClientTest, TestMutateDeletedRow) {
@@ -3637,8 +3630,8 @@ TEST_F(ClientTest, TestMutateDeletedRow) {
   FlushSessionOrDie(session);
   ASSERT_OK(ApplyDeleteToSession(session.get(), client_table_, 1));
   FlushSessionOrDie(session);
-  ScanTableToStrings(client_table_.get(), &rows);
-  ASSERT_EQ(0, rows.size());
+  ASSERT_OK(ScanTableToStrings(client_table_.get(), &rows));
+  ASSERT_TRUE(rows.empty());
 
   // Attempt update deleted row
   ASSERT_OK(ApplyUpdateToSession(session.get(), client_table_, 1, 2));
@@ -3649,8 +3642,8 @@ TEST_F(ClientTest, TestMutateDeletedRow) {
   unique_ptr<KuduError> error = GetSingleErrorFromSession(session.get());
   ASSERT_EQ(error->failed_op().ToString(),
             "UPDATE int32 key=1, int32 int_val=2");
-  ScanTableToStrings(client_table_.get(), &rows);
-  ASSERT_EQ(0, rows.size());
+  ASSERT_OK(ScanTableToStrings(client_table_.get(), &rows));
+  ASSERT_TRUE(rows.empty());
 
   // Attempt delete deleted row
   ASSERT_OK(ApplyDeleteToSession(session.get(), client_table_, 1));
@@ -3661,8 +3654,8 @@ TEST_F(ClientTest, TestMutateDeletedRow) {
   error = GetSingleErrorFromSession(session.get());
   ASSERT_EQ(error->failed_op().ToString(),
             "DELETE int32 key=1");
-  ScanTableToStrings(client_table_.get(), &rows);
-  ASSERT_EQ(0, rows.size());
+  ASSERT_OK(ScanTableToStrings(client_table_.get(), &rows));
+  ASSERT_TRUE(rows.empty());
 }
 
 TEST_F(ClientTest, TestMutateNonexistentRow) {
@@ -3679,8 +3672,8 @@ TEST_F(ClientTest, TestMutateNonexistentRow) {
   unique_ptr<KuduError> error = GetSingleErrorFromSession(session.get());
   ASSERT_EQ(error->failed_op().ToString(),
             "UPDATE int32 key=1, int32 int_val=2");
-  ScanTableToStrings(client_table_.get(), &rows);
-  ASSERT_EQ(0, rows.size());
+  ASSERT_OK(ScanTableToStrings(client_table_.get(), &rows));
+  ASSERT_TRUE(rows.empty());
 
   // Attempt delete nonexistent row
   ASSERT_OK(ApplyDeleteToSession(session.get(), client_table_, 1));
@@ -3691,8 +3684,8 @@ TEST_F(ClientTest, TestMutateNonexistentRow) {
   error = GetSingleErrorFromSession(session.get());
   ASSERT_EQ(error->failed_op().ToString(),
             "DELETE int32 key=1");
-  ScanTableToStrings(client_table_.get(), &rows);
-  ASSERT_EQ(0, rows.size());
+  ASSERT_OK(ScanTableToStrings(client_table_.get(), &rows));
+  ASSERT_TRUE(rows.empty());
 }
 
 TEST_F(ClientTest, TestUpsert) {
@@ -3705,10 +3698,10 @@ TEST_F(ClientTest, TestUpsert) {
 
   {
     vector<string> rows;
-    ScanTableToStrings(client_table_.get(), &rows);
-    EXPECT_EQ(vector<string>({R"((int32 key=1, int32 int_val=1, string string_val="original row", )"
-              "int32 non_null_with_default=12345)"}),
-      rows);
+    ASSERT_OK(ScanTableToStrings(client_table_.get(), &rows));
+    ASSERT_EQ(1, rows.size());
+    EXPECT_EQ(R"((int32 key=1, int32 int_val=1, string string_val="original row", )"
+              "int32 non_null_with_default=12345)", rows[0]);
   }
 
   // Perform and verify UPSERT which acts as an UPDATE.
@@ -3717,10 +3710,10 @@ TEST_F(ClientTest, TestUpsert) {
 
   {
     vector<string> rows;
-    ScanTableToStrings(client_table_.get(), &rows);
-    EXPECT_EQ(vector<string>({R"((int32 key=1, int32 int_val=2, string string_val="upserted row", )"
-              "int32 non_null_with_default=12345)"}),
-        rows);
+    ASSERT_OK(ScanTableToStrings(client_table_.get(), &rows));
+    ASSERT_EQ(1, rows.size());
+    EXPECT_EQ(R"((int32 key=1, int32 int_val=2, string string_val="upserted row", )"
+              "int32 non_null_with_default=12345)", rows[0]);
   }
 
   // Apply an UPDATE including the column that has a default and verify it.
@@ -3735,10 +3728,10 @@ TEST_F(ClientTest, TestUpsert) {
   }
   {
     vector<string> rows;
-    ScanTableToStrings(client_table_.get(), &rows);
-    EXPECT_EQ(vector<string>({R"((int32 key=1, int32 int_val=2, string string_val="updated row", )"
-              "int32 non_null_with_default=999)"}),
-        rows);
+    ASSERT_OK(ScanTableToStrings(client_table_.get(), &rows));
+    ASSERT_EQ(1, rows.size());
+    EXPECT_EQ(R"((int32 key=1, int32 int_val=2, string string_val="updated row", )"
+              "int32 non_null_with_default=999)", rows[0]);
   }
 
   // Perform another UPSERT. This upsert doesn't specify the 'non_null_with_default'
@@ -3747,11 +3740,10 @@ TEST_F(ClientTest, TestUpsert) {
   FlushSessionOrDie(session);
   {
     vector<string> rows;
-    ScanTableToStrings(client_table_.get(), &rows);
-    EXPECT_EQ(vector<string>({
-          R"((int32 key=1, int32 int_val=3, string string_val="upserted row 2", )"
-          "int32 non_null_with_default=999)"}),
-        rows);
+    ASSERT_OK(ScanTableToStrings(client_table_.get(), &rows));
+    ASSERT_EQ(1, rows.size());
+    EXPECT_EQ(R"((int32 key=1, int32 int_val=3, string string_val="upserted row 2", )"
+              "int32 non_null_with_default=999)", rows[0]);
   }
 
   // Delete the row.
@@ -3759,8 +3751,8 @@ TEST_F(ClientTest, TestUpsert) {
   FlushSessionOrDie(session);
   {
     vector<string> rows;
-    ScanTableToStrings(client_table_.get(), &rows);
-    EXPECT_EQ(vector<string>({}), rows);
+    ASSERT_OK(ScanTableToStrings(client_table_.get(), &rows));
+    EXPECT_TRUE(rows.empty());
   }
 }
 
@@ -4118,7 +4110,7 @@ TEST_F(ClientTest, TestDeleteTable) {
   // Insert a few rows, and scan them back. This is to populate the MetaCache.
   NO_FATALS(InsertTestRows(client_.get(), client_table_.get(), 10));
   vector<string> rows;
-  ScanTableToStrings(client_table_.get(), &rows);
+  ASSERT_OK(ScanTableToStrings(client_table_.get(), &rows));
   ASSERT_EQ(10, rows.size());
 
   // Remove the table
@@ -4411,7 +4403,7 @@ TEST_F(ClientTest, TestSeveralRowMutatesPerBatch) {
   ASSERT_OK(ApplyUpdateToSession(session.get(), client_table_, 1, 2));
   FlushSessionOrDie(session);
   vector<string> rows;
-  ScanTableToStrings(client_table_.get(), &rows);
+  ASSERT_OK(ScanTableToStrings(client_table_.get(), &rows));
   ASSERT_EQ(1, rows.size());
   ASSERT_EQ(R"((int32 key=1, int32 int_val=2, string string_val="", )"
             "int32 non_null_with_default=12345)", rows[0]);
@@ -4423,7 +4415,7 @@ TEST_F(ClientTest, TestSeveralRowMutatesPerBatch) {
   ASSERT_OK(ApplyInsertToSession(session.get(), client_table_, 2, 1, ""));
   ASSERT_OK(ApplyDeleteToSession(session.get(), client_table_, 2));
   FlushSessionOrDie(session);
-  ScanTableToStrings(client_table_.get(), &rows);
+  ASSERT_OK(ScanTableToStrings(client_table_.get(), &rows));
   ASSERT_EQ(1, rows.size());
   ASSERT_EQ(R"((int32 key=1, int32 int_val=2, string string_val="", )"
             "int32 non_null_with_default=12345)", rows[0]);
@@ -4434,14 +4426,14 @@ TEST_F(ClientTest, TestSeveralRowMutatesPerBatch) {
   ASSERT_OK(ApplyUpdateToSession(session.get(), client_table_, 1, 1));
   ASSERT_OK(ApplyDeleteToSession(session.get(), client_table_, 1));
   FlushSessionOrDie(session);
-  ScanTableToStrings(client_table_.get(), &rows);
-  ASSERT_EQ(0, rows.size());
+  ASSERT_OK(ScanTableToStrings(client_table_.get(), &rows));
+  ASSERT_TRUE(rows.empty());
 
   // Test delete/insert (insert a row first)
   LOG(INFO) << "Inserting row for delete/insert test, key " << 1 << ".";
   ASSERT_OK(ApplyInsertToSession(session.get(), client_table_, 1, 1, ""));
   FlushSessionOrDie(session);
-  ScanTableToStrings(client_table_.get(), &rows);
+  ASSERT_OK(ScanTableToStrings(client_table_.get(), &rows));
   ASSERT_EQ(1, rows.size());
   ASSERT_EQ(R"((int32 key=1, int32 int_val=1, string string_val="", )"
             "int32 non_null_with_default=12345)", rows[0]);
@@ -4450,7 +4442,7 @@ TEST_F(ClientTest, TestSeveralRowMutatesPerBatch) {
   ASSERT_OK(ApplyDeleteToSession(session.get(), client_table_, 1));
   ASSERT_OK(ApplyInsertToSession(session.get(), client_table_, 1, 2, ""));
   FlushSessionOrDie(session);
-  ScanTableToStrings(client_table_.get(), &rows);
+  ASSERT_OK(ScanTableToStrings(client_table_.get(), &rows));
   ASSERT_EQ(1, rows.size());
   ASSERT_EQ(R"((int32 key=1, int32 int_val=2, string string_val="", )"
             "int32 non_null_with_default=12345)", rows[0]);
@@ -4863,8 +4855,10 @@ TEST_F(ClientTest, TestInsertEmptyPK) {
   // Utility function to get the current value of the row.
   const auto ReadRowAsString = [&]() {
     vector<string> rows;
-    ScanTableToStrings(table.get(), &rows);
-    if (rows.empty()) return string("<none>");
+    CHECK_OK(ScanTableToStrings(table.get(), &rows));
+    if (rows.empty()) {
+      return string("<none>");
+    }
     CHECK_EQ(1, rows.size());
     return rows[0];
   };
diff --git a/src/kudu/integration-tests/alter_table-test.cc b/src/kudu/integration-tests/alter_table-test.cc
index a80b5a9..8cab86c 100644
--- a/src/kudu/integration-tests/alter_table-test.cc
+++ b/src/kudu/integration-tests/alter_table-test.cc
@@ -706,7 +706,7 @@ void AlterTableTest::UpdateRow(int32_t row_key,
 void AlterTableTest::ScanToStrings(vector<string>* rows) {
   shared_ptr<KuduTable> table;
   CHECK_OK(client_->OpenTable(kTableName, &table));
-  ScanTableToStrings(table.get(), rows);
+  CHECK_OK(ScanTableToStrings(table.get(), rows));
   std::sort(rows->begin(), rows->end());
 }
 
diff --git a/src/kudu/integration-tests/flex_partitioning-itest.cc b/src/kudu/integration-tests/flex_partitioning-itest.cc
index edef835..cfa7c57 100644
--- a/src/kudu/integration-tests/flex_partitioning-itest.cc
+++ b/src/kudu/integration-tests/flex_partitioning-itest.cc
@@ -505,9 +505,9 @@ void FlexPartitioningITest::InsertAndVerifyScans(const RangePartitionOptions& ra
   // First, ensure that we get back the same number we put in.
   {
     vector<string> rows;
-    ScanTableToStrings(table_.get(), &rows);
-    std::sort(rows.begin(), rows.end());
+    ASSERT_OK(ScanTableToStrings(table_.get(), &rows));
     ASSERT_EQ(row_count, rows.size());
+    std::sort(rows.begin(), rows.end());
   }
 
   // Perform some scans with predicates.


[kudu] 02/05: [docs]: Fix bad markdown formatting in tablet.md

Posted by aw...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 8124d94825e3573a95d87d48d2c3326eb499cab3
Author: lingbin <li...@gmail.com>
AuthorDate: Thu Aug 22 16:17:46 2019 +0800

    [docs]: Fix bad markdown formatting in tablet.md
    
    Use '```' for code block
    
    Change-Id: I06e04031c8b817b3a3d7f771705c85a161cbb08d
    Reviewed-on: http://gerrit.cloudera.org:8080/14120
    Tested-by: Kudu Jenkins
    Reviewed-by: Andrew Wong <aw...@cloudera.com>
---
 docs/design-docs/tablet.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/design-docs/tablet.md b/docs/design-docs/tablet.md
index d4656be..fb2d0fe 100644
--- a/docs/design-docs/tablet.md
+++ b/docs/design-docs/tablet.md
@@ -135,12 +135,12 @@ Note that "mutation" in this case can be one of three types:
 
 As a concrete example, consider the following sequence on a table with schema
 (key STRING, val UINT32):
-
+```
   INSERT INTO t VALUES ("row", 1);         [timestamp 1]
   UPDATE t SET val = 2 WHERE key = "row";  [timestamp 2]
   DELETE FROM t WHERE key = "row";         [timestamp 3]
   INSERT INTO t VALUES ("row", 3);         [timestamp 4]
-
+```
 This would result in the following structure in the MemRowSet:
 ```
   +-----------------------------------+


[kudu] 03/05: KUDU-2847: Optimize iteration over selection vector in SerializeRowBlock

Posted by aw...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 7116a0841ca4dcd62d3e6338cfbd6672df86770a
Author: Todd Lipcon <to...@apache.org>
AuthorDate: Tue Jul 23 22:52:51 2019 -0700

    KUDU-2847: Optimize iteration over selection vector in SerializeRowBlock
    
    This improves the performance of serializing RowBlocks to the wire by
    amortizing the cost of iterating over the set bits of the selection
    bitmap. Instead of using the bitmap once per column, we convert the
    bitmap to a list of set indices up front, and then use those indices for
    conversion.
    
    This changes the benchmarks to report cycles/cell instead of raw times,
    making it easier to see the effects of column count or sparsity.
    
    Benchmark results:
      column count 3 and row select rate 1:     5.12520529   ->  5.44280228 cycles/cell
      column count 3 and row select rate 0.8:   12.74473127  ->  7.04588262 cycles/cell
      column count 3 and row select rate 0.5:   23.98607461  ->  7.51201477 cycles/cell
      column count 3 and row select rate 0.2:   40.66053179  ->  8.30233998 cycles/cell
      column count 30 and row select rate 1:    15.43040511  ->  15.97765642 cycles/cell
      column count 30 and row select rate 0.8:  23.7480557   ->  17.84433817 cycles/cell
      column count 30 and row select rate 0.5:  40.08323337  ->  17.67888749 cycles/cell
      column count 30 and row select rate 0.2:  48.62210244  ->  16.56884988 cycles/cell
      column count 300 and row select rate 1:   18.9223316   ->  20.90426976 cycles/cell
      column count 300 and row select rate 0.8: 27.50793008  ->  21.92481189 cycles/cell
      column count 300 and row select rate 0.5: 40.34367716  ->  21.32180024 cycles/cell
      column count 300 and row select rate 0.2: 52.7446843   ->  20.92634437 cycles/cell
    
    Patch co-authored by Zhang Yao.
    
    Change-Id: I19917d1875c46fd4cf98ef8a471b0340a76161e7
    Reviewed-on: http://gerrit.cloudera.org:8080/13721
    Tested-by: Kudu Jenkins
    Reviewed-by: Andrew Wong <aw...@cloudera.com>
---
 src/kudu/common/rowblock-test.cc      | 21 +++++++++
 src/kudu/common/rowblock.cc           | 30 +++++++++++++
 src/kudu/common/rowblock.h            |  4 ++
 src/kudu/common/wire_protocol-test.cc | 27 +++++++-----
 src/kudu/common/wire_protocol.cc      | 80 ++++++++++++++++-------------------
 5 files changed, 108 insertions(+), 54 deletions(-)

diff --git a/src/kudu/common/rowblock-test.cc b/src/kudu/common/rowblock-test.cc
index 1dd85b8..c2d5a84 100644
--- a/src/kudu/common/rowblock-test.cc
+++ b/src/kudu/common/rowblock-test.cc
@@ -18,9 +18,12 @@
 #include "kudu/common/rowblock.h"
 
 #include <cstddef>
+#include <vector>
 
 #include <gtest/gtest.h>
 
+using std::vector;
+
 namespace kudu {
 
 TEST(TestSelectionVector, TestEquals) {
@@ -56,11 +59,29 @@ TEST(TestSelectionVector, TestNonByteAligned) {
   ASSERT_EQ(sv.nrows(), sv.CountSelected());
   ASSERT_TRUE(sv.AnySelected());
 
+  vector<int> sel;
+  sv.GetSelectedRows(&sel);
+  ASSERT_EQ(sv.nrows(), sel.size());
+
   for (size_t i = 0; i < sv.nrows(); i++) {
     sv.SetRowUnselected(i);
   }
   ASSERT_EQ(0, sv.CountSelected());
   ASSERT_FALSE(sv.AnySelected());
+  sv.GetSelectedRows(&sel);
+  ASSERT_EQ(0, sel.size());
+}
+
+TEST(TestSelectionVector, TestGetSelectedRows) {
+  vector<int> expected = {1, 4, 9, 10, 18};
+  SelectionVector sv(20);
+  sv.SetAllFalse();
+  for (int i : expected) {
+    sv.SetRowSelected(i);
+  }
+  vector<int> selected;
+  sv.GetSelectedRows(&selected);
+  ASSERT_EQ(expected, selected);
 }
 
 } // namespace kudu
diff --git a/src/kudu/common/rowblock.cc b/src/kudu/common/rowblock.cc
index 4996973..f89b379 100644
--- a/src/kudu/common/rowblock.cc
+++ b/src/kudu/common/rowblock.cc
@@ -16,12 +16,17 @@
 // under the License.
 #include "kudu/common/rowblock.h"
 
+#include <numeric>
+#include <vector>
+
 #include <glog/logging.h>
 
 #include "kudu/gutil/bits.h"
 #include "kudu/gutil/port.h"
 #include "kudu/util/bitmap.h"
 
+using std::vector;
+
 namespace kudu {
 
 SelectionVector::SelectionVector(size_t row_capacity)
@@ -69,6 +74,31 @@ void SelectionVector::ClearToSelectAtMost(size_t max_rows) {
   }
 }
 
+void SelectionVector::GetSelectedRows(vector<int>* selected) const {
+  int n_selected = CountSelected();
+  selected->resize(n_selected);
+  if (n_selected == 0) {
+    return;
+  }
+  if (n_selected == n_rows_) {
+    std::iota(selected->begin(), selected->end(), 0);
+    return;
+  }
+
+  const uint8_t* bitmap = &bitmap_[0];
+  int* dst = selected->data();
+  // Within each byte, keep flipping the least significant non-zero bit and adding
+  // the bit index to the output until none are set.
+  for (int i = 0; i < n_bytes_; i++) {
+    uint8_t bm = *bitmap++;
+    while (bm != 0) {
+      int bit = Bits::FindLSBSetNonZero(bm);
+      *dst++ = (i * 8) + bit;
+      bm ^= (1 << bit);
+    }
+  }
+}
+
 size_t SelectionVector::CountSelected() const {
   return Bits::Count(&bitmap_[0], n_bytes_);
 }
diff --git a/src/kudu/common/rowblock.h b/src/kudu/common/rowblock.h
index 678b355..d65ee4e 100644
--- a/src/kudu/common/rowblock.h
+++ b/src/kudu/common/rowblock.h
@@ -103,6 +103,10 @@ class SelectionVector {
     return BitmapFindFirstSet(&bitmap_[0], row_offset, n_rows_, row);
   }
 
+  // Sets '*selected' to the indices of all rows marked as selected
+  // in this selection vector.
+  void GetSelectedRows(std::vector<int>* selected) const;
+
   uint8_t *mutable_bitmap() {
     return &bitmap_[0];
   }
diff --git a/src/kudu/common/wire_protocol-test.cc b/src/kudu/common/wire_protocol-test.cc
index 49dc500..cb5294e 100644
--- a/src/kudu/common/wire_protocol-test.cc
+++ b/src/kudu/common/wire_protocol-test.cc
@@ -39,6 +39,7 @@
 #include "kudu/common/wire_protocol.pb.h"
 #include "kudu/gutil/port.h"
 #include "kudu/gutil/strings/substitute.h"
+#include "kudu/gutil/walltime.h"
 #include "kudu/util/bitmap.h"
 #include "kudu/util/bloom_filter.h"
 #include "kudu/util/faststring.h"
@@ -151,22 +152,28 @@ class WireProtocolTest : public KuduTest,
   void RunBenchmark(int column_count, double select_rate) {
     ResetBenchmarkSchema(column_count);
     Arena arena(1024);
-    const int kNumTrials = AllowSlowTests() ? 100 : 10;
-    RowBlock block(&benchmark_schema_, 10000, &arena);
+    RowBlock block(&benchmark_schema_, 1000, &arena);
+    // Regardless of the config, use a constant number of cells for the test by
+    // looping the conversion an appropriate number of times.
+    const int64_t kNumCellsToConvert = AllowSlowTests() ? 100000000 : 1000000;
+    const int kNumTrials = kNumCellsToConvert / select_rate / column_count / block.nrows();
     FillRowBlockForBenchmark(&block);
     SelectRandomRowsWithRate(&block, select_rate);
 
     RowwiseRowBlockPB pb;
     faststring direct, indirect;
-    LOG_TIMING(INFO, Substitute("Converting to PB with column count $0 and row select rate $1 ",
-                                column_count, select_rate)) {
-      for (int i = 0; i < kNumTrials; ++i) {
-        pb.Clear();
-        direct.clear();
-        indirect.clear();
-        SerializeRowBlock(block, &pb, nullptr, &direct, &indirect);
-      }
+    int64_t cycle_start = CycleClock::Now();
+    for (int i = 0; i < kNumTrials; ++i) {
+      pb.Clear();
+      direct.clear();
+      indirect.clear();
+      SerializeRowBlock(block, &pb, nullptr, &direct, &indirect);
     }
+    int64_t cycle_end = CycleClock::Now();
+    LOG(INFO) << Substitute(
+        "Converting to PB with column count $0 and row select rate $1: $2 cycles/cell",
+        column_count, select_rate,
+        static_cast<double>(cycle_end - cycle_start) / kNumCellsToConvert);
   }
  protected:
   Schema schema_;
diff --git a/src/kudu/common/wire_protocol.cc b/src/kudu/common/wire_protocol.cc
index 9350231..82e67cd 100644
--- a/src/kudu/common/wire_protocol.cc
+++ b/src/kudu/common/wire_protocol.cc
@@ -907,51 +907,40 @@ void AppendRowToString<RowBlockRow>(const RowBlockRow& row, string* buf) {
 // be copied to column 'dst_col_idx' in the output protobuf; otherwise,
 // dst_col_idx must be equal to col_idx.
 template<bool IS_NULLABLE, bool IS_VARLEN>
-static void CopyColumn(const RowBlock& block, int col_idx, int dst_col_idx, uint8_t* dst_base,
-                       faststring* indirect_data, const Schema* dst_schema, size_t row_stride,
-                       size_t schema_byte_size, size_t column_offset) {
+static void CopyColumn(
+    const ColumnBlock& column_block, int dst_col_idx, uint8_t* __restrict__ dst_base,
+    faststring* indirect_data, const Schema* dst_schema, size_t row_stride,
+    size_t schema_byte_size, size_t column_offset,
+    const vector<int>& row_idx_select) {
   DCHECK(dst_schema);
-  ColumnBlock column_block = block.column_block(col_idx);
   uint8_t* dst = dst_base + column_offset;
   size_t offset_to_null_bitmap = schema_byte_size - column_offset;
 
   size_t cell_size = column_block.stride();
   const uint8_t* src = column_block.cell_ptr(0);
 
-  BitmapIterator selected_row_iter(block.selection_vector()->bitmap(), block.nrows());
-  int run_size;
-  bool selected;
-  int row_idx = 0;
-  while ((run_size = selected_row_iter.Next(&selected))) {
-    if (!selected) {
-      src += run_size * cell_size;
-      row_idx += run_size;
-      continue;
-    }
-    for (int i = 0; i < run_size; i++) {
-      if (IS_NULLABLE && column_block.is_null(row_idx)) {
-        BitmapChange(dst + offset_to_null_bitmap, dst_col_idx, true);
-      } else if (IS_VARLEN) {
-        const Slice *slice = reinterpret_cast<const Slice *>(src);
-        size_t offset_in_indirect = indirect_data->size();
-        indirect_data->append(reinterpret_cast<const char*>(slice->data()), slice->size());
-
-        Slice *dst_slice = reinterpret_cast<Slice *>(dst);
-        *dst_slice = Slice(reinterpret_cast<const uint8_t*>(offset_in_indirect),
-                           slice->size());
-        if (IS_NULLABLE) {
-          BitmapChange(dst + offset_to_null_bitmap, dst_col_idx, false);
-        }
-      } else { // non-string, non-null
-        strings::memcpy_inlined(dst, src, cell_size);
-        if (IS_NULLABLE) {
-          BitmapChange(dst + offset_to_null_bitmap, dst_col_idx, false);
-        }
+  for (auto index : row_idx_select) {
+    src = column_block.cell_ptr(index);
+    if (IS_NULLABLE && column_block.is_null(index)) {
+      BitmapChange(dst + offset_to_null_bitmap, dst_col_idx, true);
+    } else if (IS_VARLEN) {
+      const Slice* slice = reinterpret_cast<const Slice *>(src);
+      size_t offset_in_indirect = indirect_data->size();
+      indirect_data->append(reinterpret_cast<const char*>(slice->data()), slice->size());
+
+      Slice* dst_slice = reinterpret_cast<Slice *>(dst);
+      *dst_slice = Slice(reinterpret_cast<const uint8_t*>(offset_in_indirect),
+                         slice->size());
+      if (IS_NULLABLE) {
+        BitmapChange(dst + offset_to_null_bitmap, dst_col_idx, false);
+      }
+    } else { // non-string, non-null
+      strings::memcpy_inlined(dst, src, cell_size);
+      if (IS_NULLABLE) {
+        BitmapChange(dst + offset_to_null_bitmap, dst_col_idx, false);
       }
-      dst += row_stride;
-      src += cell_size;
-      row_idx++;
     }
+    dst += row_stride;
   }
 }
 
@@ -1002,6 +991,8 @@ void SerializeRowBlock(const RowBlock& block,
     memset(base, 0, additional_size);
   }
 
+  vector<int> selected_row_indexes;
+  block.selection_vector()->GetSelectedRows(&selected_row_indexes);
   size_t t_schema_idx = 0;
   size_t padding_so_far = 0;
   for (int p_schema_idx = 0; p_schema_idx < projection_schema->num_columns(); p_schema_idx++) {
@@ -1010,6 +1001,7 @@ void SerializeRowBlock(const RowBlock& block,
     DCHECK_NE(t_schema_idx, -1);
 
     size_t column_offset = projection_schema->column_offset(p_schema_idx) + padding_so_far;
+    const ColumnBlock& column_block = block.column_block(t_schema_idx);
 
     // Generating different functions for each of these cases makes them much less
     // branch-heavy -- we do the branch once outside the loop, and then have a
@@ -1018,17 +1010,17 @@ void SerializeRowBlock(const RowBlock& block,
     // even bigger gains, since we could inline the constant cell sizes and column
     // offsets.
     if (col.is_nullable() && col.type_info()->physical_type() == BINARY) {
-      CopyColumn<true, true>(block, t_schema_idx, p_schema_idx, base, indirect_data,
-                             projection_schema, row_stride, schema_byte_size, column_offset);
+      CopyColumn<true, true>(column_block, p_schema_idx, base, indirect_data, projection_schema,
+                             row_stride, schema_byte_size, column_offset, selected_row_indexes);
     } else if (col.is_nullable() && col.type_info()->physical_type() != BINARY) {
-      CopyColumn<true, false>(block, t_schema_idx, p_schema_idx, base, indirect_data,
-                              projection_schema, row_stride, schema_byte_size, column_offset);
+      CopyColumn<true, false>(column_block, p_schema_idx, base, indirect_data, projection_schema,
+                              row_stride, schema_byte_size, column_offset, selected_row_indexes);
     } else if (!col.is_nullable() && col.type_info()->physical_type() == BINARY) {
-      CopyColumn<false, true>(block, t_schema_idx, p_schema_idx, base, indirect_data,
-                              projection_schema, row_stride, schema_byte_size, column_offset);
+      CopyColumn<false, true>(column_block, p_schema_idx, base, indirect_data, projection_schema,
+                              row_stride, schema_byte_size, column_offset, selected_row_indexes);
     } else if (!col.is_nullable() && col.type_info()->physical_type() != BINARY) {
-      CopyColumn<false, false>(block, t_schema_idx, p_schema_idx, base, indirect_data,
-                               projection_schema, row_stride, schema_byte_size, column_offset);
+      CopyColumn<false, false>(column_block, p_schema_idx, base, indirect_data, projection_schema,
+                               row_stride, schema_byte_size, column_offset, selected_row_indexes);
     } else {
       LOG(FATAL) << "cannot reach here";
     }


[kudu] 05/05: test: deflake ksck_remote-test TestChecksumSnapshotCurrentTimestamp

Posted by aw...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 8af4883be0d49d4fe69ef697345d1feee799c935
Author: Andrew Wong <aw...@apache.org>
AuthorDate: Fri Jul 19 19:11:40 2019 -0700

    test: deflake ksck_remote-test TestChecksumSnapshotCurrentTimestamp
    
    I saw a failure of the test with the following error:
    
    W0720 00:44:31.890009  4561 tablet_service.cc:2357] Rejecting scan request for tablet 9542a9eebae84c1b993235a309c866d0: Uninitialized: safe time has not yet been initialized
    /data0/jenkins/workspace/kudu-pre-commit-unittest-ASAN/src/kudu/tools/ksck_remote-test.cc:451: Failure
    Failed
    Bad status: Aborted: 1 errors were detected
    
    This patch wraps the checksum with an ASSERT_EVENTUALLY, since the scan
    should eventually pass once a leader is elected or writes succeed.
    
    Change-Id: I51aeaebca599697e444b244ae15259dea4dbe949
    Reviewed-on: http://gerrit.cloudera.org:8080/13887
    Tested-by: Kudu Jenkins
    Reviewed-by: Hao Hao <ha...@cloudera.com>
---
 src/kudu/tools/ksck_remote-test.cc | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/kudu/tools/ksck_remote-test.cc b/src/kudu/tools/ksck_remote-test.cc
index ba4328a..a64e268 100644
--- a/src/kudu/tools/ksck_remote-test.cc
+++ b/src/kudu/tools/ksck_remote-test.cc
@@ -445,10 +445,14 @@ TEST_F(RemoteKsckTest, TestChecksumSnapshotCurrentTimestamp) {
     ASSERT_OK(ksck_->CheckClusterRunning());
     ASSERT_OK(ksck_->FetchTableAndTabletInfo());
     ASSERT_OK(ksck_->FetchInfoFromTabletServers());
-    ASSERT_OK(ksck_->ChecksumData(KsckChecksumOptions(MonoDelta::FromSeconds(10),
-                                                      MonoDelta::FromSeconds(10),
-                                                      16, true,
-                                                      KsckChecksumOptions::kCurrentTimestamp)));
+    // It's possible for scans to fail because the tablets haven't been written
+    // to yet and haven't elected a leader.
+    ASSERT_EVENTUALLY([&] {
+      ASSERT_OK(ksck_->ChecksumData(KsckChecksumOptions(MonoDelta::FromSeconds(10),
+                                                        MonoDelta::FromSeconds(10),
+                                                        16, true,
+                                                        KsckChecksumOptions::kCurrentTimestamp)));
+    });
   }
   ASSERT_OK(promise.Get());
 }