You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by aw...@apache.org on 2020/04/04 21:34:11 UTC

[kudu] branch master updated (882ae58 -> e37b340)

This is an automated email from the ASF dual-hosted git repository.

awong pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git.


    from 882ae58  bitshuffle: stop using uninitialized data as padding
     new c0a9fdc  ranger: enable log4j2 logging to files
     new e37b340  scripts: fix benchmark parsing for wire_protocol-test

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 java/kudu-subprocess/build.gradle       |   2 +
 src/kudu/ranger/mini_ranger.cc          |  12 +-
 src/kudu/ranger/mini_ranger.h           |   4 +-
 src/kudu/ranger/ranger_client-test.cc   |  68 +++++++++++
 src/kudu/ranger/ranger_client.cc        | 198 +++++++++++++++++++++++---------
 src/kudu/scripts/benchmarks.sh          |  26 +++--
 src/kudu/subprocess/CMakeLists.txt      |   1 +
 src/kudu/subprocess/subprocess_proxy.cc | 128 +++++++++++++++++++++
 src/kudu/subprocess/subprocess_proxy.h  |  23 +++-
 src/kudu/util/env.h                     |   3 +
 10 files changed, 390 insertions(+), 75 deletions(-)
 create mode 100644 src/kudu/subprocess/subprocess_proxy.cc


[kudu] 01/02: ranger: enable log4j2 logging to files

Posted by aw...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit c0a9fdc7384ac24258411949303ae1881358c3f3
Author: Andrew Wong <aw...@cloudera.com>
AuthorDate: Fri Apr 3 00:38:52 2020 -0700

    ranger: enable log4j2 logging to files
    
    This patch enables Kudu to generate a log4j2 properties file and pass it
    to the Ranger client. This file is created in a directory specified by
    the new --ranger_logging_config_dir flag, which, by default, will direct
    the properties file to --log_dir. If the properties file already exists,
    the existing file is honored. This means a file will not be regenerated
    if one has already been generated -- I will add a follow up patch to
    allow users to overwrite the file.
    
    This properties file will direct logs to the directory pointed to by
    --log_dir, and will honor the existing --max_log_files and
    --max_log_size flags.
    
    I've found logging to stdout and adjusting log42j log-level to be
    useful, so this also adds --ranger_logtostdout and --ranger_log_level
    for these respectively. I opted not to use the glog --minloglevel flag
    since they don't quite map to log4j2 log levels (e.g. glog doesn't have
    debug-level logging).
    
    I manually tested that an existing properties file that has been
    modified will be honored. Here's what's in a Ranger-configured Kudu
    master's log directory:
     kudu-master.INFO -> kudu-master.awong-MBP16-21621.local.awong.log.INFO.20200403-122527.17165
     kudu-master.awong-MBP16-21621.local.awong.diagnostics.20200403-122527.0.17165
     kudu-master.awong-MBP16-21621.local.awong.log.INFO.20200403-122527.17165
     kudu-ranger-subprocess-log4j2.properties
     kudu-ranger-subprocess.awong-MBP16-21621.local.log
    
    Change-Id: I7efa631832c219fce214304538e6ab6442062752
    Reviewed-on: http://gerrit.cloudera.org:8080/15628
    Reviewed-by: Grant Henke <gr...@apache.org>
    Tested-by: Andrew Wong <aw...@cloudera.com>
---
 java/kudu-subprocess/build.gradle       |   2 +
 src/kudu/ranger/mini_ranger.cc          |  12 +-
 src/kudu/ranger/mini_ranger.h           |   4 +-
 src/kudu/ranger/ranger_client-test.cc   |  68 +++++++++++
 src/kudu/ranger/ranger_client.cc        | 198 +++++++++++++++++++++++---------
 src/kudu/subprocess/CMakeLists.txt      |   1 +
 src/kudu/subprocess/subprocess_proxy.cc | 128 +++++++++++++++++++++
 src/kudu/subprocess/subprocess_proxy.h  |  23 +++-
 src/kudu/util/env.h                     |   3 +
 9 files changed, 374 insertions(+), 65 deletions(-)

diff --git a/java/kudu-subprocess/build.gradle b/java/kudu-subprocess/build.gradle
index eafaa83..1a9cdca 100644
--- a/java/kudu-subprocess/build.gradle
+++ b/java/kudu-subprocess/build.gradle
@@ -32,6 +32,8 @@ dependencies {
     exclude group: "org.apache.kafka"
   }
   compile libs.slf4jApi
+  compile libs.log4j
+  compile libs.log4jSlf4jImpl
 
   // Workaround for RANGER-2749. Remove once resolved.
   compile "commons-lang:commons-lang:2.6"
diff --git a/src/kudu/ranger/mini_ranger.cc b/src/kudu/ranger/mini_ranger.cc
index a880a97..9d5c284 100644
--- a/src/kudu/ranger/mini_ranger.cc
+++ b/src/kudu/ranger/mini_ranger.cc
@@ -316,19 +316,19 @@ Status MiniRanger::PostToRanger(string url, EasyJson payload) {
   return Status::OK();
 }
 
-Status MiniRanger::CreateClientConfig(const string& client_config_path) {
-  auto policy_cache = JoinPathSegments(client_config_path, "policy-cache");
-  if (!env_->FileExists(client_config_path)) {
-    RETURN_NOT_OK(env_->CreateDir(client_config_path));
+Status MiniRanger::CreateClientConfig(const string& client_config_dir) {
+  auto policy_cache = JoinPathSegments(client_config_dir, "policy-cache");
+  if (!env_->FileExists(client_config_dir)) {
+    RETURN_NOT_OK(env_->CreateDir(client_config_dir));
     RETURN_NOT_OK(env_->CreateDir(policy_cache));
   }
 
   RETURN_NOT_OK(WriteStringToFile(env_, GetRangerCoreSiteXml(kerberos_),
-                                  JoinPathSegments(client_config_path, "core-site.xml")));
+                                  JoinPathSegments(client_config_dir, "core-site.xml")));
   RETURN_NOT_OK(WriteStringToFile(env_, GetRangerKuduSecurityXml(policy_cache, "kudu",
                                                                  ranger_admin_url_,
                                                                  policy_poll_interval_ms_),
-                                  JoinPathSegments(client_config_path,
+                                  JoinPathSegments(client_config_dir,
                                                    "ranger-kudu-security.xml")));
   return Status::OK();
 }
diff --git a/src/kudu/ranger/mini_ranger.h b/src/kudu/ranger/mini_ranger.h
index e6ad364..7449f13 100644
--- a/src/kudu/ranger/mini_ranger.h
+++ b/src/kudu/ranger/mini_ranger.h
@@ -86,8 +86,8 @@ class MiniRanger {
   // Adds a new policy to Ranger.
   Status AddPolicy(AuthorizationPolicy policy) WARN_UNUSED_RESULT;
 
-  // Creates the client configs files in client_config_path.
-  Status CreateClientConfig(const std::string& client_config_path) WARN_UNUSED_RESULT;
+  // Creates the client configs files in the given directory.
+  Status CreateClientConfig(const std::string& client_config_dir) WARN_UNUSED_RESULT;
 
   void EnableKerberos(std::string krb5_config,
                       std::string admin_ktpath,
diff --git a/src/kudu/ranger/ranger_client-test.cc b/src/kudu/ranger/ranger_client-test.cc
index 017423e..73d0128 100644
--- a/src/kudu/ranger/ranger_client-test.cc
+++ b/src/kudu/ranger/ranger_client-test.cc
@@ -22,27 +22,41 @@
 #include <string>
 #include <unordered_set>
 #include <utility>
+#include <vector>
 
 #include <boost/functional/hash/hash.hpp>
+#include <gflags/gflags_declare.h>
 #include <glog/logging.h>
 #include <google/protobuf/any.pb.h>
 #include <gtest/gtest.h>
 
 #include "kudu/gutil/map-util.h"
+#include "kudu/gutil/ref_counted.h"
+#include "kudu/gutil/strings/join.h"
 #include "kudu/gutil/strings/substitute.h"
+#include "kudu/ranger/mini_ranger.h"
 #include "kudu/ranger/ranger.pb.h"
 #include "kudu/subprocess/server.h"
 #include "kudu/subprocess/subprocess.pb.h"
 #include "kudu/util/env.h"
+#include "kudu/util/env_util.h"
 #include "kudu/util/metrics.h"
+#include "kudu/util/path_util.h"
 #include "kudu/util/status.h"
 #include "kudu/util/test_macros.h"
 #include "kudu/util/test_util.h"
 
+DECLARE_string(log_dir);
+DECLARE_string(ranger_config_path);
+DECLARE_string(ranger_log_config_dir);
+DECLARE_string(ranger_log_level);
+DECLARE_bool(ranger_logtostdout);
+
 namespace kudu {
 namespace ranger {
 
 using boost::hash_combine;
+using kudu::env_util::ListFilesInDir;
 using kudu::subprocess::SubprocessMetrics;
 using kudu::subprocess::SubprocessRequestPB;
 using kudu::subprocess::SubprocessResponsePB;
@@ -50,6 +64,7 @@ using kudu::subprocess::SubprocessServer;
 using std::move;
 using std::string;
 using std::unordered_set;
+using std::vector;
 using strings::Substitute;
 
 struct AuthorizedAction {
@@ -308,5 +323,58 @@ TEST_F(RangerClientTest, TestAuthorizeActionsAllAuthorized) {
   ASSERT_EQ(3, actions.size());
 }
 
+class RangerClientTestBase : public KuduTest {
+ public:
+  RangerClientTestBase()
+      : test_dir_(GetTestDataDirectory()) {}
+
+  void SetUp() override {
+    metric_entity_ = METRIC_ENTITY_server.Instantiate(&metric_registry_, "ranger_client-test");
+    FLAGS_ranger_log_level = "debug";
+    FLAGS_ranger_logtostdout = true;
+    FLAGS_ranger_config_path = test_dir_;
+    FLAGS_ranger_log_config_dir = JoinPathSegments(test_dir_, "log_conf");
+    FLAGS_log_dir = JoinPathSegments(test_dir_, "logs");
+    ASSERT_OK(env_->CreateDir(FLAGS_log_dir));
+    ASSERT_OK(InitializeRanger());
+  }
+
+  Status InitializeRanger() {
+    ranger_.reset(new MiniRanger("127.0.0.1"));
+    RETURN_NOT_OK(ranger_->Start());
+    RETURN_NOT_OK(ranger_->CreateClientConfig(test_dir_));
+    client_.reset(new RangerClient(env_, metric_entity_));
+    return client_->Start();
+  }
+
+ protected:
+  const string test_dir_;
+  MetricRegistry metric_registry_;
+
+  scoped_refptr<MetricEntity> metric_entity_;
+  std::unique_ptr<MiniRanger> ranger_;
+  std::unique_ptr<RangerClient> client_;
+};
+
+TEST_F(RangerClientTestBase, TestLogging) {
+  {
+    // Check that a logging configuration was produced by the Ranger client.
+    vector<string> files;
+    ASSERT_OK(ListFilesInDir(env_, FLAGS_ranger_log_config_dir, &files));
+    SCOPED_TRACE(JoinStrings(files, "\n"));
+    ASSERT_STRINGS_ANY_MATCH(files, ".*log4j2.properties");
+  }
+  // Make a request. It doesn't matter whether it succeeds or not -- debug logs
+  // should include info about each request.
+  Status s = client_->AuthorizeAction("user", ActionPB::ALL, "table");
+  ASSERT_TRUE(s.IsNotAuthorized());
+
+  // Check that the Ranger client logs some things.
+  vector<string> files;
+  ASSERT_OK(ListFilesInDir(env_, FLAGS_log_dir, &files));
+  SCOPED_TRACE(JoinStrings(files, "\n"));
+  ASSERT_STRINGS_ANY_MATCH(files, "kudu-ranger-subprocess.*log");
+}
+
 } // namespace ranger
 } // namespace kudu
diff --git a/src/kudu/ranger/ranger_client.cc b/src/kudu/ranger/ranger_client.cc
index be40dd1..07baa40 100644
--- a/src/kudu/ranger/ranger_client.cc
+++ b/src/kudu/ranger/ranger_client.cc
@@ -17,6 +17,7 @@
 
 #include "kudu/ranger/ranger_client.h"
 
+#include <algorithm>
 #include <cstdint>
 #include <cstdlib>
 #include <ostream>
@@ -39,9 +40,11 @@
 #include "kudu/util/flag_tags.h"
 #include "kudu/util/flag_validators.h"
 #include "kudu/util/metrics.h"
+#include "kudu/util/net/net_util.h"
 #include "kudu/util/path_util.h"
 #include "kudu/util/slice.h"
 #include "kudu/util/status.h"
+#include "kudu/util/string_case.h"
 #include "kudu/util/subprocess.h"
 
 DEFINE_string(ranger_java_path, "",
@@ -72,6 +75,29 @@ DEFINE_string(ranger_receiver_fifo_dir, "",
               "--ranger_config_path directory.");
 TAG_FLAG(ranger_receiver_fifo_dir, advanced);
 
+DEFINE_string(ranger_log_config_dir, "",
+              "Directory in which to look for a kudu-ranger-subprocess-log4j2.properties "
+              "file. If empty, will use the value of --log_dir. If such a file does not "
+              "exist, a properties file will be created to honor Kudu's logging "
+              "configurations.");
+TAG_FLAG(ranger_log_config_dir, advanced);
+TAG_FLAG(ranger_log_config_dir, evolving);
+
+DEFINE_string(ranger_log_level, "info",
+              "Log level to use in the Ranger Java subprocess. Supports \"all\", \"trace\", "
+              "\"debug\", \"info\", \"warn\", \"error\", \"fatal\", and \"off\"");
+TAG_FLAG(ranger_log_level, advanced);
+TAG_FLAG(ranger_log_level, evolving);
+
+DEFINE_bool(ranger_logtostdout, false,
+            "Whether to have the Ranger subprocess log to stdout.");
+TAG_FLAG(ranger_logtostdout, advanced);
+TAG_FLAG(ranger_logtostdout, evolving);
+
+DECLARE_int32(max_log_files);
+DECLARE_int32(max_log_size);
+DECLARE_string(log_dir);
+
 METRIC_DEFINE_histogram(server, ranger_subprocess_execution_time_ms,
     "Ranger subprocess execution time (ms)",
     kudu::MetricUnit::kMilliseconds,
@@ -138,20 +164,34 @@ namespace ranger {
 
 using kudu::security::GetKrb5ConfigFile;
 using kudu::subprocess::SubprocessMetrics;
+using kudu::subprocess::SubprocessServer;
 using std::move;
 using std::string;
 using std::unordered_set;
 using std::vector;
 using strings::Substitute;
 
-static const char* kUnauthorizedAction = "Unauthorized action";
-static const char* kDenyNonRangerTableTemplate = "Denying action on table with invalid name $0. "
-                                                 "Use 'kudu table rename_table' to rename it to "
-                                                 "a Ranger-compatible name.";
-static const char* kMainClass = "org.apache.kudu.subprocess.ranger.RangerSubprocessMain";
+namespace {
+
+const char* kUnauthorizedAction = "Unauthorized action";
+const char* kDenyNonRangerTableTemplate = "Denying action on table with invalid name $0. "
+                                          "Use 'kudu table rename_table' to rename it to "
+                                          "a Ranger-compatible name.";
+const char* kMainClass = "org.apache.kudu.subprocess.ranger.RangerSubprocessMain";
+const char* kRangerClientLogFilename = "kudu-ranger-subprocess";
+const char* kRangerClientPropertiesFilename = "kudu-ranger-subprocess-log4j2.properties";
+
+const char* ScopeToString(RangerClient::Scope scope) {
+  switch (scope) {
+    case RangerClient::Scope::DATABASE: return "database";
+    case RangerClient::Scope::TABLE: return "table";
+  }
+  LOG(FATAL) << static_cast<uint16_t>(scope) << ": unknown scope";
+  __builtin_unreachable();
+}
 
 // Returns the path to the JAR file containing the Ranger subprocess.
-static string GetRangerJarPath() {
+string RangerJarPath() {
   if (FLAGS_ranger_jar_path.empty()) {
     string exe;
     CHECK_OK(Env::Default()->GetExecutablePath(&exe));
@@ -162,18 +202,19 @@ static string GetRangerJarPath() {
 }
 
 // Returns the classpath to be used for the Ranger subprocess.
-static string GetJavaClasspath() {
-  return Substitute("$0:$1", GetRangerJarPath(), FLAGS_ranger_config_path);
+string JavaClasspath() {
+  DCHECK(!FLAGS_ranger_config_path.empty());
+  return Substitute("$0:$1", RangerJarPath(), FLAGS_ranger_config_path);
 }
 
-static string ranger_fifo_base() {
+string RangerFifoBase() {
   DCHECK(!FLAGS_ranger_config_path.empty());
   const string& fifo_dir = FLAGS_ranger_receiver_fifo_dir.empty() ?
       FLAGS_ranger_config_path : FLAGS_ranger_receiver_fifo_dir;
   return JoinPathSegments(fifo_dir, "ranger_receiever_fifo");
 }
 
-static string java_path() {
+string JavaPath() {
   if (FLAGS_ranger_java_path.empty()) {
     auto java_home = getenv("JAVA_HOME");
     if (!java_home) {
@@ -184,20 +225,103 @@ static string java_path() {
   return FLAGS_ranger_java_path;
 }
 
+bool ValidateRangerConfiguration() {
+  if (!FLAGS_ranger_config_path.empty()) {
+    // First, check the specified Java path.
+    const string java_path = JavaPath();
+    if (!Env::Default()->FileExists(java_path)) {
+      // Otherwise, since the specified path is not absolute, check if
+      // the Java binary is on the PATH.
+      string p;
+      Status s = Subprocess::Call({ "which", java_path }, "", &p);
+      if (!s.ok()) {
+        LOG(ERROR) << Substitute("--ranger_java_path has invalid java binary path: $0",
+                                 java_path);
+        return false;
+      }
+    }
+    const string ranger_jar_path = RangerJarPath();
+    if (!Env::Default()->FileExists(ranger_jar_path)) {
+      LOG(ERROR) << Substitute("--ranger_jar_path has invalid JAR file path: $0",
+                               ranger_jar_path);
+      return false;
+    }
+  }
+  return true;
+}
+GROUP_FLAG_VALIDATOR(ranger_config_flags, ValidateRangerConfiguration);
+
+bool ValidateLog4jLevel(const char* /*flagname*/, const string& value) {
+  static const vector<string> kLevels = {
+    "all",
+    "trace",
+    "debug",
+    "info",
+    "warn",
+    "error",
+    "fatal",
+    "off",
+  };
+  string vlower = value;
+  ToLowerCase(&vlower);
+  if (std::any_of(kLevels.begin(), kLevels.end(),
+      [&vlower] (const string& level) { return level == vlower; })) {
+    return true;
+  }
+  LOG(ERROR) << Substitute("expected one of {$0} but got $1",
+                           JoinStrings(kLevels, ", "), value);
+  return false;
+}
+DEFINE_validator(ranger_log_level, &ValidateLog4jLevel);
+
+Status GetOrCreateLog4j2PropertiesFile(Env* env, string* logging_properties_file) {
+  const string log_conf_dir = FLAGS_ranger_log_config_dir.empty() ?
+      FLAGS_log_dir : FLAGS_ranger_log_config_dir;
+  // It's generally expected that --log_dir has already been created elsewhere.
+  if (!FLAGS_ranger_log_config_dir.empty() && !env->FileExists(log_conf_dir)) {
+    RETURN_NOT_OK(env->CreateDir(log_conf_dir));
+  }
+  const string log4j2_properties_file = JoinPathSegments(log_conf_dir,
+                                                         kRangerClientPropertiesFilename);
+  string new_or_existing;
+  if (env->FileExists(log4j2_properties_file)) {
+    new_or_existing = "existing";
+  } else {
+    string exe;
+    RETURN_NOT_OK(env->GetExecutablePath(&exe));
+    const string program_name = BaseName(exe);
+    string hostname;
+    RETURN_NOT_OK(GetHostname(&hostname));
+    const string log_filename = Substitute("$0.$1", kRangerClientLogFilename, hostname);
+    RETURN_NOT_OK(WriteStringToFileSync(
+        env, subprocess::Log4j2Properties(program_name, FLAGS_log_dir, log_filename,
+                                          FLAGS_max_log_size, FLAGS_max_log_files,
+                                          FLAGS_ranger_log_level,
+                                          FLAGS_ranger_logtostdout),
+        log4j2_properties_file));
+    new_or_existing = "new";
+  }
+  LOG(INFO) << Substitute("Using $0 properties file: $1",
+                          new_or_existing, log4j2_properties_file);
+  *logging_properties_file = log4j2_properties_file;
+  return Status::OK();
+}
+
 // Builds the arguments to start the Ranger subprocess with the given receiver
 // fifo path. Specifically pass the principal and keytab file that the Ranger
 // subprocess will log in with if Kerberos is enabled. 'args' has the final
 // arguments.  Returns 'OK' if arguments successfully created, error otherwise.
-static Status BuildArgv(const string& fifo_path, vector<string>* argv) {
+Status BuildArgv(const string& fifo_path, const string& log_properties_file,
+                 vector<string>* argv) {
   DCHECK(argv);
   DCHECK(!FLAGS_ranger_config_path.empty());
   // Pass the required arguments to run the Ranger subprocess.
-  vector<string> ret = { java_path() };
-
-  ret.emplace_back(Substitute("-Djava.security.krb5.conf=$0", GetKrb5ConfigFile()));
-  ret.emplace_back("-cp");
-  ret.emplace_back(GetJavaClasspath());
-  ret.emplace_back(kMainClass);
+  vector<string> ret = {
+    JavaPath(),
+    Substitute("-Djava.security.krb5.conf=$0", GetKrb5ConfigFile()),
+    Substitute("-Dlog4j2.configurationFile=$0", log_properties_file),
+    "-cp", JavaClasspath(), kMainClass,
+  };
   // When Kerberos is enabled in Kudu, pass both Kudu principal and keytab file
   // to the Ranger subprocess.
   if (!FLAGS_keytab_file.empty()) {
@@ -215,39 +339,7 @@ static Status BuildArgv(const string& fifo_path, vector<string>* argv) {
   return Status::OK();
 }
 
-static bool ValidateRangerConfiguration() {
-  if (!FLAGS_ranger_config_path.empty()) {
-    // First, check the specified path.
-    if (!Env::Default()->FileExists(java_path())) {
-      // Otherwise, since the specified path is not absolute, check if
-      // the Java binary is on the PATH.
-      string p;
-      Status s = Subprocess::Call({ "which", java_path() }, "", &p);
-      if (!s.ok()) {
-        LOG(ERROR) << Substitute("--ranger_java_path has invalid java binary path: $0",
-                                 java_path());
-        return false;
-      }
-    }
-    string ranger_jar_path = GetRangerJarPath();
-    if (!Env::Default()->FileExists(ranger_jar_path)) {
-      LOG(ERROR) << Substitute("--ranger_jar_path has invalid JAR file path: $0",
-                               ranger_jar_path);
-      return false;
-    }
-  }
-  return true;
-}
-GROUP_FLAG_VALIDATOR(ranger_config_flags, ValidateRangerConfiguration);
-
-static const char* ScopeToString(RangerClient::Scope scope) {
-  switch (scope) {
-    case RangerClient::Scope::DATABASE: return "database";
-    case RangerClient::Scope::TABLE: return "table";
-  }
-  LOG(FATAL) << static_cast<uint16_t>(scope) << ": unknown scope";
-  __builtin_unreachable();
-}
+} // anonymous namespace
 
 #define HISTINIT(member, x) member = METRIC_##x.Instantiate(entity)
 RangerSubprocessMetrics::RangerSubprocessMetrics(const scoped_refptr<MetricEntity>& entity) {
@@ -270,9 +362,11 @@ RangerClient::RangerClient(Env* env, const scoped_refptr<MetricEntity>& metric_e
 
 Status RangerClient::Start() {
   VLOG(1) << "Initializing Ranger subprocess server";
+  string log_properties_file;
+  RETURN_NOT_OK(GetOrCreateLog4j2PropertiesFile(env_, &log_properties_file));
+  const string fifo_path = SubprocessServer::FifoPath(RangerFifoBase());
   vector<string> argv;
-  const string fifo_path = subprocess::SubprocessServer::FifoPath(ranger_fifo_base());
-  RETURN_NOT_OK(BuildArgv(fifo_path, &argv));
+  RETURN_NOT_OK(BuildArgv(fifo_path, log_properties_file, &argv));
   subprocess_.reset(new RangerSubprocess(env_, fifo_path, std::move(argv), metric_entity_));
   return subprocess_->Start();
 }
diff --git a/src/kudu/subprocess/CMakeLists.txt b/src/kudu/subprocess/CMakeLists.txt
index a2612db..65cb40c 100644
--- a/src/kudu/subprocess/CMakeLists.txt
+++ b/src/kudu/subprocess/CMakeLists.txt
@@ -49,6 +49,7 @@ add_custom_target(subprocess_jar DEPENDS ${SUBPROCESS_JAR})
 add_library(kudu_subprocess
   server.cc
   subprocess_protocol.cc
+  subprocess_proxy.cc
 )
 target_link_libraries(kudu_subprocess
   gutil
diff --git a/src/kudu/subprocess/subprocess_proxy.cc b/src/kudu/subprocess/subprocess_proxy.cc
new file mode 100644
index 0000000..ee8d3a9
--- /dev/null
+++ b/src/kudu/subprocess/subprocess_proxy.cc
@@ -0,0 +1,128 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/subprocess/subprocess_proxy.h"
+
+#include <string>
+#include <vector>
+
+#include "kudu/gutil/strings/join.h"
+#include "kudu/gutil/strings/substitute.h"
+
+using std::string;
+using std::vector;
+using strings::Substitute;
+
+namespace kudu {
+namespace subprocess {
+
+namespace {
+// Helpers to generate log4j2 properties suitable for a Java-based subprocess.
+// TODO(awong): if our logging requirements get more complex, we should
+// consider a builder for these.
+
+// $0: appender instance
+// $1: appender name
+// $2: log directory
+// $3: log filename base - new logs will be called "{log directory}/{log
+//     filename base}.log" and will roll to include the date in the name
+// $4: rollover policy size in MB
+// $5: number of files max
+const char* kLog4j2RollingPropertiesTemplate = R"(
+appender.$0.type = RollingFile
+appender.$0.name = $1
+appender.$0.layout.type = PatternLayout
+appender.$0.layout.pattern = %d{yyyy-MM-dd HH:mm:ss.SSS} [%p - %t] (%F:%L) %m%n
+appender.$0.filename = $2/$3.log
+appender.$0.filePattern = $2/$3.%d{yyyyMMdd-HHmmss}.log.gz
+appender.$0.policies.type = Policies
+appender.$0.policies.size.type = SizeBasedTriggeringPolicy
+appender.$0.policies.size.size = $4 MB
+appender.$0.strategy.type = DefaultRolloverStrategy
+appender.$0.strategy.max = $5
+)";
+
+// $0: appender instance
+// $1: appender name
+const char* kLog4j2ConsoleProperties = R"(
+appender.$0.type = Console
+appender.$0.name = $1
+appender.$0.layout.type = PatternLayout
+appender.$0.layout.pattern = %d{yyyy-MM-dd HH:mm:ss.SSS} [%p - %t] (%F:%L) %m%n
+)";
+
+// $0: name of the creator of these properties (e.g. the program name)
+// $1: comma-separated list of appender instances
+// $2: newline-separated list of appender configs
+// $3: log level (supports "all", "debug", "info", "warn", "error", "fatal")
+// $4: comma-separated list of appender refs
+// $5: newline-separated list of appender ref name specifications
+const char* kLog4j2PropertiesTemplate = R"(
+# THIS FILE WAS GENERATED BY $0.
+
+status = error
+name = PropertiesConfig
+appenders = $1
+$2
+rootLogger.level = $3
+rootLogger.appenderRefs = $4
+$5
+)";
+} // anonymous namespace
+
+string Log4j2Properties(const string& creator, const string& log_dir,
+                        const string& log_filename, int rollover_size_mb,
+                        int max_files, const string& log_level,
+                        bool log_to_stdout) {
+  static const char* kRollingInstance = "rollingFile";
+  static const char* kRollingName = "RollingFileAppender";
+  static const char* kRollingRef = "rolling";
+  vector<string> appender_instances = { kRollingInstance };
+  vector<string> appender_names = { kRollingName };
+  vector<string> appender_configs = {
+    Substitute(kLog4j2RollingPropertiesTemplate, kRollingInstance, kRollingName,
+               log_dir, log_filename, rollover_size_mb, max_files)
+  };
+  vector<string> appender_refs = { kRollingRef };
+  if (log_to_stdout) {
+    static const char* kConsoleInstance = "console";
+    static const char* kConsoleName = "SystemOutAppender";
+    static const char* kConsoleRef = "stdout";
+    appender_instances.emplace_back(kConsoleInstance);
+    appender_names.emplace_back(kConsoleName);
+    appender_refs.emplace_back(kConsoleRef);
+    appender_configs.emplace_back(
+        strings::Substitute(kLog4j2ConsoleProperties, kConsoleInstance, kConsoleName));
+  }
+
+  DCHECK_EQ(appender_refs.size(), appender_names.size());
+  vector<string> appender_ref_name_specs(appender_refs.size());
+  for (int i = 0; i < appender_refs.size(); i++) {
+    appender_ref_name_specs.emplace_back(
+        strings::Substitute("rootLogger.appenderRef.$0.ref = $1",
+                            appender_refs[i], appender_names[i]));
+  }
+  return Substitute(kLog4j2PropertiesTemplate, creator,
+                    JoinStrings(appender_instances, ", "),
+                    JoinStrings(appender_configs, "\n"),
+                    log_level,
+                    JoinStrings(appender_refs, ", "),
+                    JoinStrings(appender_ref_name_specs, "\n"));
+}
+
+}  // namespace subprocess
+}  // namespace kudu
diff --git a/src/kudu/subprocess/subprocess_proxy.h b/src/kudu/subprocess/subprocess_proxy.h
index 76289bd..d5a5d1b 100644
--- a/src/kudu/subprocess/subprocess_proxy.h
+++ b/src/kudu/subprocess/subprocess_proxy.h
@@ -16,24 +16,37 @@
 // under the License.
 #pragma once
 
-#include <vector>
+#include <memory>
+#include <ostream>
 #include <string>
+#include <utility>
+#include <vector>
 
 #include <glog/logging.h>
+#include <google/protobuf/any.pb.h>
 
 #include "kudu/common/wire_protocol.h"
-#include "kudu/subprocess/server.h"
-#include "kudu/subprocess/subprocess.pb.h"
 #include "kudu/gutil/ref_counted.h"
 #include "kudu/gutil/strings/substitute.h"
-#include "kudu/util/metrics.h"
+#include "kudu/subprocess/server.h"
+#include "kudu/subprocess/subprocess.pb.h"
 #include "kudu/util/pb_util.h"
 #include "kudu/util/status.h"
-#include "kudu/util/thread.h"
 
 namespace kudu {
+
+class Env;
+class MetricEntity;
+
 namespace subprocess {
 
+// Return a string that can serve as the contents of a log4j2 properties file,
+// with the given logging parameters.
+std::string Log4j2Properties(const std::string& creator, const std::string& log_dir,
+                             const std::string& log_filename, int rollover_size_mb,
+                             int max_files, const std::string& log_level,
+                             bool log_to_stdout);
+
 // Template that wraps a SubprocessServer, exposing only the underlying ReqPB
 // and RespPB as an interface. The given MetricsPB will be initialized,
 // allowing for metrics specific to each specialized SubprocessServer.
diff --git a/src/kudu/util/env.h b/src/kudu/util/env.h
index 7c57bb3..611de80 100644
--- a/src/kudu/util/env.h
+++ b/src/kudu/util/env.h
@@ -691,6 +691,9 @@ class FileLock {
 // A utility routine: write "data" to the named file.
 extern Status WriteStringToFile(Env* env, const Slice& data,
                                 const std::string& fname);
+// Like above but also fsyncs the new file.
+extern Status WriteStringToFileSync(Env* env, const Slice& data,
+                                    const std::string& fname);
 
 // A utility routine: read contents of named file into *data
 extern Status ReadFileToString(Env* env, const std::string& fname,


[kudu] 02/02: scripts: fix benchmark parsing for wire_protocol-test

Posted by aw...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit e37b3401d008f98a159187b46cc2f1b650152db1
Author: Andrew Wong <aw...@cloudera.com>
AuthorDate: Fri Apr 3 21:11:04 2020 -0700

    scripts: fix benchmark parsing for wire_protocol-test
    
    This fixes the existing parsing for row-wise conversion, and adds
    parsing for the new columnar conversion, writing the columnar stats as a
    new workload "WireProtocolBenchmark_columnar" and plotting results as
    "wire-protocol-test_columnar".
    
    Change-Id: I9d1ac45cf730674a4aa3b7d1f7f60b6919e3eeb1
    Reviewed-on: http://gerrit.cloudera.org:8080/15651
    Tested-by: Kudu Jenkins
    Reviewed-by: Grant Henke <gr...@apache.org>
---
 src/kudu/scripts/benchmarks.sh | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/src/kudu/scripts/benchmarks.sh b/src/kudu/scripts/benchmarks.sh
index 3b3f08d..19f1393 100755
--- a/src/kudu/scripts/benchmarks.sh
+++ b/src/kudu/scripts/benchmarks.sh
@@ -218,10 +218,9 @@ run_benchmarks() {
 
   # run wire_protocol-test 5 times.
   #
-  # We run the first shard, which uses a column count of 3 and a select rate of
-  # 1 (as this is most similar to the unsharded microbenchmark we ran for years).
+  # We run the non-null 10-column benchmark, selecting all rows.
   for i in $(seq 1 $NUM_SAMPLES); do
-    KUDU_ALLOW_SLOW_TESTS=true ./build/latest/bin/wire_protocol-test --gtest_filter=*Benchmark/0 \
+    KUDU_ALLOW_SLOW_TESTS=true ./build/latest/bin/wire_protocol-test --gtest_filter=*Benchmark/10_int64_non_null_sel_100pct \
       &> $LOGDIR/$WIRE_PROTOCOL_TEST$i.log
   done
 
@@ -361,10 +360,16 @@ parse_and_record_all_results() {
     record_result $BUILD_IDENTIFIER $MT_BLOOM_TEST $i $real
   done
 
-  # Parse out rate of cycles/sec from: "Converting to PB with column count 3 and row select rate 1: 32.987841 cycles/cell"
+  # Parse out row-wise rate of cycles/cell from:
+  #   "Converting 10_int64_non_null to PB (method row-wise) row select rate 1: 30.196263 cycles/cell"
+  # Parse out the columnar rate of cycles/cell from:
+  #   "Converting 10_int64_non_null to PB (method columnar) row select rate 1: 1.313369 cycles/cell"
   for i in $(seq 1 $NUM_SAMPLES); do
-    real=`grep "Converting to PB with column count" $LOGDIR/$WIRE_PROTOCOL_TEST$i.log | sed 's|^.*: \([[:digit:].]*\) cycles/cell$|\1|'`
-    record_result $BUILD_IDENTIFIER $WIRE_PROTOCOL_TEST $i $real
+    real_rowwise=`grep "Converting.*to PB (method row-wise)" $LOGDIR/$WIRE_PROTOCOL_TEST$i.log | sed 's|^.*: \([[:digit:].]*\) cycles/cell$|\1|'`
+    record_result $BUILD_IDENTIFIER $WIRE_PROTOCOL_TEST $i $real_rowwise
+
+    real_colwise=`grep "Converting.*to PB (method columnar)" $LOGDIR/$WIRE_PROTOCOL_TEST$i.log | sed 's|^.*: \([[:digit:].]*\) cycles/cell$|\1|'`
+    record_result $BUILD_IDENTIFIER ${WIRE_PROTOCOL_TEST}_columnar $i $real_colwise
   done
 
   # parse the rate out of: "I1009 15:00:30.023576 27043 rpc-bench.cc:108] Reqs/sec:         84404.4"
@@ -513,12 +518,13 @@ load_stats_and_generate_plots() {
   load_and_generate_plot "${MEMROWSET_BENCH}${INSERT}" memrowset-bench-insert
   load_and_generate_plot "${MEMROWSET_BENCH}Scan%" memrowset-bench-scan
 
-  load_and_generate_plot $BLOOM_TEST bloom-test
-  load_and_generate_plot $MT_BLOOM_TEST mt-bloom-test
+  load_and_generate_plot "$BLOOM_TEST" bloom-test
+  load_and_generate_plot "$MT_BLOOM_TEST" mt-bloom-test
 
-  load_and_generate_plot $WIRE_PROTOCOL_TEST wire-protocol-test
+  load_and_generate_plot "${WIRE_PROTOCOL_TEST}" wire-protocol-test
+  load_and_generate_plot "${WIRE_PROTOCOL_TEST}_columnar" wire-protocol-test-columnar
 
-  load_and_generate_plot $RPC_BENCH_TEST rpc-bench-test
+  load_and_generate_plot "$RPC_BENCH_TEST" rpc-bench-test
 
   load_and_generate_plot "${TS_INSERT_LATENCY}%" ts-insert-latency