You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by to...@apache.org on 2020/04/06 18:19:04 UTC

[kudu] branch master updated (b08fb26 -> f89a0ce)

This is an automated email from the ASF dual-hosted git repository.

todd pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git.


    from b08fb26  ranger: allow overwriting of the log4j2 properties file
     new e524334  rowblock: use BMI instruction set when available for GetSelectedRows
     new f89a0ce  ranger: remove experimental flags

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/kudu/common/rowblock.cc      | 22 +++++++++++++++++++---
 src/kudu/ranger/ranger_client.cc |  3 ---
 2 files changed, 19 insertions(+), 6 deletions(-)


[kudu] 01/02: rowblock: use BMI instruction set when available for GetSelectedRows

Posted by to...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

todd pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit e524334ff04253f38eda7ab3bc589ce76c0934ae
Author: Todd Lipcon <to...@apache.org>
AuthorDate: Wed Apr 1 15:11:58 2020 -0700

    rowblock: use BMI instruction set when available for GetSelectedRows
    
    This enables a BMI variant of SelectionVector::GetSelectedRows which has
    a higher throughput. I disassembled the resulting hot loop as follows:
    
    BMI:
      L:
        tzcnt  %rsi,%rbx
        or     %r11d,%ebx
        mov    %bx,(%rdx)
        blsr   %rsi,%rsi
        tzcnt  %rsi,%rbx
        or     %r11d,%ebx
        mov    %bx,0x2(%rdx)
        blsr   %rsi,%rsi
        tzcnt  %rsi,%rbx
        or     %r11d,%ebx
        mov    %bx,0x4(%rdx)
        add    $0x6,%rdx
        blsr   %rsi,%rsi
        add    $0xfffffffd,%ecx
        jne    L
    
    non-BMI:
      L:
        bsf    %rsi,%rax
        or     %r12d,%eax
        mov    %ax,(%rdx)
        lea    -0x1(%rsi),%rax
        and    %rsi,%rax
        bsf    %rax,%rsi
        or     %r12d,%esi
        mov    %si,0x2(%rdx)
        lea    -0x1(%rax),%rbx
        and    %rax,%rbx
        bsf    %rbx,%rax
        or     %r12d,%eax
        mov    %ax,0x4(%rdx)
        add    $0x6,%rdx
        lea    -0x1(%rbx),%rsi
        and    %rbx,%rsi
        add    $0xfffffffd,%ecx
        jne    L
    
    ... and then used llvm-mca on these assembly files across a few common
    architectures to see how many cycles were required for 100 iterations of
    the loop. Results are as follows:
    
    haswell non-bmi.s: Total Cycles:      606
    haswell bmi.s: Total Cycles:          382
    
    broadwell non-bmi.s: Total Cycles     606
    broadwell bmi.s: Total Cycles:        382
    
    skylake non-bmi.s: Total Cycles:      606
    skylake bmi.s: Total Cycles:          307
    
    So, on the most recent chips, this should be about a 2x improvement in
    this function. This function made up a few percent of overall CPU
    consumption in some TSBS workloads, so this patch had some small but
    measurable improvement on end-to-end throughput.
    
    Change-Id: I8ec74bc5db07c18d0e36de14a2343f49fc5c2859
    Reviewed-on: http://gerrit.cloudera.org:8080/15635
    Tested-by: Kudu Jenkins
    Reviewed-by: Alexey Serbin <as...@cloudera.com>
---
 src/kudu/common/rowblock.cc | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/src/kudu/common/rowblock.cc b/src/kudu/common/rowblock.cc
index 33fb220..85bd4ca 100644
--- a/src/kudu/common/rowblock.cc
+++ b/src/kudu/common/rowblock.cc
@@ -23,6 +23,7 @@
 #include <glog/logging.h>
 
 #include "kudu/gutil/bits.h"
+#include "kudu/gutil/cpu.h"
 #include "kudu/gutil/port.h"
 #include "kudu/util/bitmap.h"
 
@@ -76,8 +77,7 @@ void SelectionVector::ClearToSelectAtMost(size_t max_rows) {
 }
 
 
-// TODO(todd) this is a bit faster when implemented with target "bmi" enabled.
-// Consider duplicating it and doing runtime switching.
+template<bool BMI>
 static void GetSelectedRowsInternal(const uint8_t* __restrict__ bitmap,
                                     int n_bytes,
                                     uint16_t* __restrict__ dst) {
@@ -87,6 +87,18 @@ static void GetSelectedRowsInternal(const uint8_t* __restrict__ bitmap,
                 });
 }
 
+static const bool kHasBmi = base::CPU().has_bmi();
+
+#ifdef __x86_64__
+// Explicit instantiation with the BMI instruction set enabled, which
+// makes this slightly faster.
+template
+__attribute__((target("bmi")))
+void GetSelectedRowsInternal<true>(const uint8_t* __restrict__ bitmap,
+                                   int n_bytes,
+                                   uint16_t* __restrict__ dst);
+#endif
+
 SelectedRows SelectionVector::GetSelectedRows() const {
   CHECK_LE(n_rows_, std::numeric_limits<uint16_t>::max());
   int n_selected = CountSelected();
@@ -97,7 +109,11 @@ SelectedRows SelectionVector::GetSelectedRows() const {
   vector<uint16_t> selected;
   if (n_selected > 0) {
     selected.resize(n_selected);
-    GetSelectedRowsInternal(&bitmap_[0], n_bytes_, selected.data());
+    if (kHasBmi) {
+      GetSelectedRowsInternal<true>(&bitmap_[0], n_bytes_, selected.data());
+    } else {
+      GetSelectedRowsInternal<false>(&bitmap_[0], n_bytes_, selected.data());
+    }
   }
   return SelectedRows(this, std::move(selected));
 }


[kudu] 02/02: ranger: remove experimental flags

Posted by to...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

todd pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit f89a0ced376019a27bcdee58fb2874bf4854406b
Author: Hao Hao <ha...@cloudera.com>
AuthorDate: Mon Apr 6 00:05:21 2020 -0700

    ranger: remove experimental flags
    
    This patch removes the experimental flags in Ranger, given I did
    integration tests on a real cluster.
    
    Change-Id: I87e36f973730f54e6e3984e34a8216eeeacc1088
    Reviewed-on: http://gerrit.cloudera.org:8080/15658
    Reviewed-by: Andrew Wong <aw...@cloudera.com>
    Tested-by: Kudu Jenkins
    Reviewed-by: Alexey Serbin <as...@cloudera.com>
---
 src/kudu/ranger/ranger_client.cc | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/kudu/ranger/ranger_client.cc b/src/kudu/ranger/ranger_client.cc
index ff78112..c661f69 100644
--- a/src/kudu/ranger/ranger_client.cc
+++ b/src/kudu/ranger/ranger_client.cc
@@ -55,20 +55,17 @@ DEFINE_string(ranger_java_path, "",
               "evaluated using the Kudu user's PATH. Empty string means "
               "$JAVA_HOME/bin/java is used. If $JAVA_HOME is not found, Kudu "
               "will attempt to find 'java' in $PATH.");
-TAG_FLAG(ranger_java_path, experimental);
 
 DEFINE_string(ranger_config_path, "",
               "Path to directory containing Ranger client configuration. "
               "Enables Ranger authorization provider. "
               "sentry_service_rpc_addresses must not be set if this is "
               "enabled.");
-TAG_FLAG(ranger_config_path, experimental);
 
 DEFINE_string(ranger_jar_path, "",
               "Path to the JAR file containing the Ranger subprocess. "
               "If not set, the default JAR file path is expected to be"
               "next to the master binary.");
-TAG_FLAG(ranger_jar_path, experimental);
 
 DEFINE_string(ranger_receiver_fifo_dir, "",
               "Directory in which to create a fifo used to receive messages "