You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@kudu.apache.org by to...@apache.org on 2017/06/08 21:12:37 UTC

[1/4] kudu git commit: Java: fix git worktree compatibility

Repository: kudu
Updated Branches:
  refs/heads/master 0c1b52fdf -> 2d30f3a96


Java: fix git worktree compatibility

The .git file is not a directory in git worktree checkouts.

A similar issue was fixed in thirdparty in 21f5ae0d53f9e.

Change-Id: I7297b3a69ccfc0a9ce9db083a478bd7e92e64dcf
Reviewed-on: http://gerrit.cloudera.org:8080/7104
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <to...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/3296c076
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/3296c076
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/3296c076

Branch: refs/heads/master
Commit: 3296c076d19538f27e012e724ad5b26fec147b9b
Parents: 0c1b52f
Author: Dan Burkert <da...@apache.org>
Authored: Wed Jun 7 10:31:13 2017 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Wed Jun 7 19:07:28 2017 +0000

----------------------------------------------------------------------
 .../src/test/java/org/apache/kudu/client/TestUtils.java            | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/3296c076/java/kudu-client/src/test/java/org/apache/kudu/client/TestUtils.java
----------------------------------------------------------------------
diff --git a/java/kudu-client/src/test/java/org/apache/kudu/client/TestUtils.java b/java/kudu-client/src/test/java/org/apache/kudu/client/TestUtils.java
index e9a74a4..e3e0622 100644
--- a/java/kudu-client/src/test/java/org/apache/kudu/client/TestUtils.java
+++ b/java/kudu-client/src/test/java/org/apache/kudu/client/TestUtils.java
@@ -108,7 +108,7 @@ public class TestUtils {
    */
   private static File findParentGitDir(File dir) {
     while (dir != null) {
-      if (new File(dir, ".git").isDirectory()) {
+      if (new File(dir, ".git").exists()) {
         return dir;
       }
       dir = dir.getParentFile();

[3/4] kudu git commit: [catalog_manager_tsk-itest] remove KUDU-1294 workaround

Posted by to...@apache.org.

[catalog_manager_tsk-itest] remove KUDU-1294 workaround

KUDU-1294 is fixed now, so it's possible to remove the workaround
for the memory accounting bug. Before KUDU-1294 was fixed, the
workaround was necessary because the test crashed sometimes due
DCHECK() firing in the destructor of the MemTracker class.

Change-Id: Ia371c222472e2deb43cf6b1471bf5ccb9394847d
Reviewed-on: http://gerrit.cloudera.org:8080/7101
Tested-by: Kudu Jenkins
Reviewed-by: David Ribeiro Alves <da...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/cf686240
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/cf686240
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/cf686240

Branch: refs/heads/master
Commit: cf6862406a9bb4cddf0c96114b13bcc7890abff6
Parents: 3ed4a00
Author: Alexey Serbin <as...@cloudera.com>
Authored: Tue Jun 6 22:12:22 2017 -0700
Committer: Alexey Serbin <as...@cloudera.com>
Committed: Thu Jun 8 19:26:04 2017 +0000

----------------------------------------------------------------------
 src/kudu/integration-tests/catalog_manager_tsk-itest.cc | 5 -----
 1 file changed, 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/cf686240/src/kudu/integration-tests/catalog_manager_tsk-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/catalog_manager_tsk-itest.cc b/src/kudu/integration-tests/catalog_manager_tsk-itest.cc
index 656d15a..2b201c2 100644
--- a/src/kudu/integration-tests/catalog_manager_tsk-itest.cc
+++ b/src/kudu/integration-tests/catalog_manager_tsk-itest.cc
@@ -65,11 +65,6 @@ class CatalogManagerTskITest : public KuduTest {
     // Add common flags for both masters and tservers.
     const vector<string> common_flags = {
       Substitute("--raft_heartbeat_interval_ms=$0", hb_interval_ms_),
-      // Added a workaround for memory accounting bug, otherwise the assertion
-      // from MemTracker destructor fires in rare cases. Since the memory
-      // accounting code is slated for revamping, let's disable the memory
-      // tracking for this test.
-      "--tablet_transaction_memory_limit_mb=-1",
     };
     copy(common_flags.begin(), common_flags.end(),
         back_inserter(cluster_opts_.extra_master_flags));

[2/4] kudu git commit: Release notes for Kudu 1.4

Posted by to...@apache.org.

Release notes for Kudu 1.4

Change-Id: I28a8dda999ab75607e07f760a4d15683dd39c85b
Reviewed-on: http://gerrit.cloudera.org:8080/7108
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Reviewed-by: Alexey Serbin <as...@cloudera.com>
Reviewed-by: Jean-Daniel Cryans <jd...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/3ed4a007
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/3ed4a007
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/3ed4a007

Branch: refs/heads/master
Commit: 3ed4a007c4b5994afbbd6bbba86d8ca128b97748
Parents: 3296c07
Author: Todd Lipcon <to...@apache.org>
Authored: Wed Jun 7 15:33:20 2017 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Thu Jun 8 03:00:55 2017 +0000

----------------------------------------------------------------------
 docs/known_issues.adoc  |  10 ++-
 docs/release_notes.adoc | 191 ++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 189 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/3ed4a007/docs/known_issues.adoc
----------------------------------------------------------------------
diff --git a/docs/known_issues.adoc b/docs/known_issues.adoc
index 6fa8632..1d17763 100644
--- a/docs/known_issues.adoc
+++ b/docs/known_issues.adoc
@@ -51,9 +51,9 @@
 
 === Columns
 
-* TIMESTAMP, DECIMAL, CHAR, VARCHAR, DATE, and complex types like ARRAY are not supported.
+* DECIMAL, CHAR, VARCHAR, DATE, and complex types like ARRAY are not supported.
 
-* Type, nullability, compression, and encoding of existing columns cannot be changed by altering the table.
+* Type and nullability of existing columns cannot be changed by altering the table.
 
 * Tables can have a maximum of 300 columns.
 
@@ -184,3 +184,9 @@ to communicate only the most important known issues.
   to start up. It is recommended to limit the number of tablets per server to 100 or fewer.
   Consider this limitation when pre-splitting your tables. If you notice slow start-up times,
   you can monitor the number of tablets per server in the web UI.
+
+* Kerberos authentication does not function correctly on hosts which contain
+  capital letters in their hostname.
+
+* Kerberos authentication does not function correctly if `rdns = false` is configured
+  in `krb5.conf`.

http://git-wip-us.apache.org/repos/asf/kudu/blob/3ed4a007/docs/release_notes.adoc
----------------------------------------------------------------------
diff --git a/docs/release_notes.adoc b/docs/release_notes.adoc
index ee19f36..f8dddfc 100644
--- a/docs/release_notes.adoc
+++ b/docs/release_notes.adoc
@@ -33,9 +33,165 @@
 [[rn_1.4.0_new_features]]
 == New features
 
+* The C++ and Java client libraries now support the ability to alter the
+  storage attributes (e.g. encoding and compression) and default value
+  of existing columns. Additionally, it is now possible to rename
+  a column which is part of a table's primary key.
+
+* The C++ client library now includes an experimental `KuduPartitioner` API which may
+  be used to efficiently map rows to their associated partitions and hosts.
+  This may be used to achieve better locality or distribution of writes
+  in client applications.
+
+* The Java client library now supports enabling fault tolerance on scanners.
+  Fault tolerant scanners are able to transparently recover from concurrent
+  server crashes at the cost of some performance overhead. See the Java
+  API documentation for more details on usage.
+
+* The `kudu` command line tool now includes a new advanced administrative
+  command `kudu remote_replica unsafe_change_config`. This command may be used
+  to force a tablet to perform an unsafe change of its Raft replication
+  configuration. This can be used to recover from scenarios such as a loss
+  of a majority of replicas, at the risk of losing edits.
+
+* The `kudu` command line tool now includes the `kudu fs check` command
+  which performs various offline consistency checks on the local on-disk
+  storage of a Kudu Tablet Server or Master. In addition to detecting
+  various inconsistencies or corruptions, it can also detect and remove
+  data blocks that are no longer referenced by any tablet but were not
+  fully removed from disk due to a crash or a bug in prior versions of Kudu.
+
+* The `kudu` command line tool can now be used to list the addresses and
+  identifiers of the servers in the cluster using either `kudu master list`
+  or `kudu tserver list`.
+
+* Kudu 1.4 now includes the optional ability to compute, store, and verify
+  checksums on all pieces of data stored on a server. Prior versions only
+  performed checksums on certain portions of the stored data. This feature
+  is not enabled by default since it makes a backward-incompatible change
+  to the on-disk formats and thus prevent downgrades. Kudu 1.5 will enable
+  the feature by default.
 
 == Optimizations and improvements
 
+* `kudu cluster ksck` now detects and reports new classes of
+  inconsistencies and issues. In particular, it is better able to
+  detect cases where a configuration change such as a replica eviction
+  or addition is pending but is unable to be committed. It also now
+  properly detects and reports cases where a tablet has no elected
+  leader.
+
+* The default size for Write Ahead Log (WAL) segments has been reduced
+  from 64MB to 8MB. Additionally, in the case that all replicas of a
+  tablet are fully up to date and data has been flushed from memory,
+  servers will now retain only a single WAL segment rather than
+  two. These changes are expected to reduce the average consumption of
+  disk space on the configured WAL disk by 16x, as well as improve the
+  startup speed of tablet servers by reducing the number and size of
+  WAL segments that need to be re-read.
+
+* The default on-disk storage system used by Kudu servers (Log Block Manager)
+  has been improved to compact its metadata and remove dead containers.
+  This compaction and garbage collection occurs only at startup. Thus, the
+  first startup after upgrade is expected to be longer than usual, and
+  subsequent restarts should be shorter.
+
+* The usability of the Kudu web interfaces has been improved,
+  particularly for the case where a server hosts many tablets or a
+  table has many partitions. Pages that list tablets now include
+  a top-level summary of tablet status and show the complete list
+  under a toggleable section.
+
+* The Maintenance Manager has been improved to improve utilization of the
+  configured maintenance threads. Previously, maintenance work would
+  only be scheduled a maximum of 4 times per second, but now maintenance
+  work will be scheduled immediately whenever any configured thread is
+  available. This can improve the throughput of write-heavy workloads.
+
+* The Maintenance Manager will now aggressively schedule flushes of
+  in-memory data when memory consumption crosses 60% of the configured
+  process-wide memory limit. The backpressure mechanism which begins
+  to throttle client writes has been accordingly adjusted to not begin
+  throttling until reaching 80% of the configured limit. These two
+  changes together result in improved write throughput, more consistent
+  latency, and fewer timeouts due to memory exhaustion.
+
+* Many performance improvements were made to write performance. Applications
+  which send large batches of writes to Kudu should see substantially
+  improved throughput in Kudu 1.4.
+
+* Several improvements were made to reduce the memory consumption of
+  Kudu Tablet Servers which hold large volumes of data. The specific
+  amount of memory saved varies depending on workload, but the expectation
+  is that approximately 350MB of excess peak memory usage has been eliminated
+  per TB of data stored.
+
+* The number of threads used by the Kudu Tablet Server has been reduced.
+  Previously, each tablet used a dedicated thread to append to its WAL.
+  Those threads now automatically stop running if there is no activity
+  on a given tablet for a short period of time.
+
+[[rn_1.4.0_fixed_issues]]
+== Fixed Issues
+
+* link:https://issues.apache.org/jira/browse/KUDU-2020[KUDU-2020]
+  Fixed an issue where re-replication after a failure would proceed
+  significantly slower than expected. This bug caused many tablets
+  to be unnecessarily copied multiple times before successfully
+  being considered re-replicated, resulting in significantly more
+  network and IO bandwidth usage than expected. Mean time to recovery
+  on clusters with large amounts of data is improved by up to 10x by this
+  fix.
+
+* link:https://issues.apache.org/jira/browse/KUDU-1982[KUDU-1982]
+  Fixed an issue where the Java client would call `NetworkInterface.getByInetAddress`
+  very often, causing performance problems particularly on Windows
+  where this function can be quite slow.
+
+* link:https://issues.apache.org/jira/browse/KUDU-1755[KUDU-1755]
+  Improved the accuracy of the `on_disk_size` replica metrics to
+  include the size consumed by bloom filters, primary key indexes,
+  and superblock metadata, and delta files. Note that, because the size
+  metric is now more accurate, the reported values are expected to
+  increase after upgrading to Kudu 1.4. This does not indicate that
+  replicas are using more space after the upgrade; rather, it is
+  now accurately reporting the amount of space that has always been
+  used.
+
+* link:https://issues.apache.org/jira/browse/KUDU-1192[KUDU-1192]
+  Kudu servers will now periodically flush their log messages to disk
+  even if no `WARNING`-level messages have been logged. This makes it
+  easier to tail the logs to see progress output during normal startup.
+
+* link:https://issues.apache.org/jira/browse/KUDU-1999[KUDU-1999]
+  Fixed the ability to run Spark jobs in "cluster" mode against
+  Kudu clusters secured by Kerberos.
+
+
+[[rn_1.4.0_wire_compatibility]]
+== Wire Protocol compatibility
+
+Kudu 1.4.0 is wire-compatible with previous versions of Kudu:
+
+* Kudu 1.4 clients may connect to servers running Kudu 1.0 or later. If the client uses
+  features that are not available on the target server, an error will be returned.
+* Kudu 1.0 clients may connect to servers running Kudu 1.4 with the exception of the
+  below-mentioned restrictions regarding secure clusters.
+* Rolling upgrade between Kudu 1.3 and Kudu 1.4 servers is believed to be possible
+  though has not been sufficiently tested. Users are encouraged to shut down all nodes
+  in the cluster, upgrade the software, and then restart the daemons on the new version.
+
+The authentication features introduced in Kudu 1.3 place the following limitations
+on wire compatibility between Kudu 1.4 and versions earlier than 1.3:
+
+* If a Kudu 1.4 cluster is configured with authentication or encryption set to "required",
+  clients older than Kudu 1.3 will be unable to connect.
+* If a Kudu 1.4 cluster is configured with authentication and encryption set to "optional"
+  or "disabled", older clients will still be able to connect.
+
+[[rn_1.4.0_incompatible_changes]]
+== Incompatible Changes in Kudu 1.4.0
+
 * Kudu servers, by default, will now only allow unencrypted or unauthenticated connections
   from trusted subnets, which are private networks (127.0.0.0/8,10.0.0.0/8,172.16.0.0/12,
   192.168.0.0/16,169.254.0.0/16) and local subnets of all local network interfaces.
@@ -49,20 +205,35 @@ The trusted subnets can be configured using the `--trusted_subnets` flag, which
    access. This can be mitigated if authentication and encryption are configured to be
    required.
 
-[[rn_1.4.0_fixed_issues]]
-== Fixed Issues
-
-
-[[rn_1.4.0_wire_compatibility]]
-== Wire Protocol compatibility
+[[rn_1.4.0_client_compatibility]]
 
+=== Client Library Compatibility
+* The Kudu 1.4 Java client library is API- and ABI-compatible with Kudu 1.3. Applications
+  written against Kudu 1.3 will compile and run against the Kudu 1.4 client library and
+  vice-versa, unless one of the following newly added APIs is used:
+** `[Async]KuduScannerBuilder.setFaultTolerant(...)`
+** New methods in `AlterTableOptions`: `removeDefault`, `changeDefault`, `changeDesiredBlockSize`,
+   `changeEncoding`, `changeCompressionAlgorithm`
+** `KuduClient.updateLastPropagatedTimestamp`
+** `KuduClient.getLastPropagatedTimestamp`
+** New getters in `PartialRow`: `getBoolean`, `getByte`, `getShort`, `getInt`, `getLong`,
+   `getFloat`, `getDouble`, `getString`, `getBinaryCopy`, `getBinary`, `isNull`,
+   `isSet`.
 
-[[rn_1.4.0_incompatible_changes]]
-== Incompatible Changes in Kudu 1.3.0
 
+* The Kudu 1.4 {cpp} client is API- and ABI-forward-compatible with Kudu 1.3.
+  Applications written and compiled against the Kudu 1.3 client library will run without
+  modification against the Kudu 1.4 client library. Applications written and compiled
+  against the Kudu 1.4 client library will run without modification against the Kudu 1.3
+  client library unless they use one of the following new APIs:
+** `KuduPartitionerBuilder`
+** `KuduPartitioner
+** `KuduScanner::SetRowFormatFlags` (unstable API)
+** `KuduScanBatch::direct_data`, `KuduScanBatch::indirect_data` (unstable API)
 
-[[rn_1.4.0_client_compatibility]]
-=== Client Library Compatibility
+* The Kudu 1.4 Python client is API-compatible with Kudu 1.3. Applications
+  written against Kudu 1.3 will continue to run against the Kudu 1.4 client
+  and vice-versa.
 
 
 [[rn_1.4.0_known_issues]]

[4/4] kudu git commit: codegen: fix regression in inlining after LLVM 4.0 upgrade

Posted by to...@apache.org.

codegen: fix regression in inlining after LLVM 4.0 upgrade

With the upgrade to LLVM 4.0, the performance of our generated code
regressed significantly. After looking at the generated assembly from
codegen-test, I could see that there were many 'call' instructions that
didn't appear in the earlier output, which led me to suspect the
inliner.

After adding a 'module->dump()' call and looking at the output, I could
see that the calls were to utility functions like BitmapTest() which
should be inlined due to their very small size. However, the function
was marked 'noinline' in precompiled.ll. After a bit of Googling I came
across a thread[1] in which someone else noticed that all of their
functions had unexpected 'noinline' attributes after upgrading to 4.0.

After following the breadcrumbs, I found some advice to change the way
in which we emit precompiled.ll to disable the built-in LLVM passes
which were responsible for adding this attribute.

This had one unintended side-effect which initially broke codegen in
TSAN builds (which use libc++ instead of libstdcxx). Fixing this required
changing the debug builds to still run the most basic ('-O0')
optimization passes at JIT time instead of skipping the optimization
pass manager entirely. Additionally I needed to explicitly run Global
Dead Code Elimination (GlobalDCE). See the new comments in
module_builder.cc for details.

That also caused the size of generated code in UBSAN builds to increase,
so I had to increase the limit on the number of dumped instructions per
function to avoid breaking one of the tests.

During my investigation, I also found that we should have been passing
the optimization and size levels into the inlining pass, so I threw
that change in for good measure.

This fixed the perf regression. I benchmarked using:
  memrowset-test --roundtrip_num_rows=10000000 \
                 --gtest_filter=\*InsertCount\* \
                 --gtest_repeat=10

LLVM 3.9 (before the regression) (asm at [2]):

  I0607 12:00:17.540612 23532 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.295s    user 0.294s     sys 0.000s
  I0607 12:00:23.842226 23532 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.297s    user 0.297s     sys 0.000s
  I0607 12:00:29.710149 23532 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.296s    user 0.296s     sys 0.000s
  I0607 12:00:35.587321 23532 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.296s    user 0.296s     sys 0.000s
  I0607 12:00:41.451495 23532 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.296s    user 0.296s     sys 0.000s
  I0607 12:00:47.309710 23532 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.297s    user 0.297s     sys 0.000s
  I0607 12:00:53.183537 23532 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.297s    user 0.297s     sys 0.000s
  I0607 12:00:59.051522 23532 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.297s    user 0.297s     sys 0.000s
  I0607 12:01:04.935763 23532 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.297s    user 0.298s     sys 0.000s
  I0607 12:01:10.821828 23532 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.297s    user 0.299s     sys 0.000s

LLVM 4.0 (before this fix) (asm at [3]):

  I0607 12:14:07.771476 12117 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.524s    user 0.525s     sys 0.000s
  I0607 12:14:14.283260 12117 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.525s    user 0.525s     sys 0.000s
  I0607 12:14:20.376360 12117 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.543s    user 0.543s     sys 0.000s
  I0607 12:14:26.457681 12117 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.530s    user 0.530s     sys 0.000s
  I0607 12:14:32.535305 12117 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.532s    user 0.532s     sys 0.000s
  I0607 12:14:38.615105 12117 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.531s    user 0.532s     sys 0.000s
  I0607 12:14:44.697655 12117 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.531s    user 0.531s     sys 0.000s
  I0607 12:14:50.779160 12117 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.532s    user 0.532s     sys 0.000s
  I0607 12:14:56.852665 12117 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.531s    user 0.530s     sys 0.000s
  I0607 12:15:02.954947 12117 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.538s    user 0.540s     sys 0.000s

LLVM 4.0 (after this fix) (asm at [4]):

  I0607 12:17:37.953876 17916 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.292s      user 0.292s     sys 0.000s
  I0607 12:17:44.214912 17916 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.295s      user 0.295s     sys 0.000s
  I0607 12:17:50.057200 17916 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.299s      user 0.299s     sys 0.000s
  I0607 12:17:55.882935 17916 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.292s      user 0.292s     sys 0.000s
  I0607 12:18:01.725445 17916 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.293s      user 0.293s     sys 0.000s
  I0607 12:18:07.562417 17916 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.293s      user 0.294s     sys 0.000s
  I0607 12:18:13.421221 17916 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.293s      user 0.293s     sys 0.000s
  I0607 12:18:19.268613 17916 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.293s      user 0.293s     sys 0.000s
  I0607 12:18:25.113490 17916 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.293s      user 0.294s     sys 0.000s
  I0607 12:18:30.950954 17916 memrowset-test.cc:446] Time spent Scanning rows where all are committed: real 0.293s      user 0.294s     sys 0.000s

Looking at the assembly for the three tests above, it's clear that the
inlining is broken in 4.0 without this fix, and with it, the generated
assembly is a little better (shorter) than 3.9.

[1] http://lists.llvm.org/pipermail/llvm-dev/2017-May/113175.html
[2] https://gist.github.com/b4b0365ea1570f33d55c6c3dffd4a8af
[3] https://gist.github.com/128ecb292fddd069d5397a76dc53fc11
[4] https://gist.github.com/0cb01f454737d6c2dc457f0cdb6bb52d

Change-Id: I7e449df80e5cd02b9ce2dbf4daa5cf8151007dcf
Reviewed-on: http://gerrit.cloudera.org:8080/7099
Reviewed-by: Todd Lipcon <to...@apache.org>
Tested-by: Todd Lipcon <to...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/2d30f3a9
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/2d30f3a9
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/2d30f3a9

Branch: refs/heads/master
Commit: 2d30f3a96dd416e7991f552c9bf5189d178d8945
Parents: cf68624
Author: Todd Lipcon <to...@cloudera.com>
Authored: Tue Jun 6 16:54:54 2017 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Thu Jun 8 20:28:08 2017 +0000

----------------------------------------------------------------------
 src/kudu/codegen/CMakeLists.txt    |  9 +++++++
 src/kudu/codegen/code_generator.cc |  2 +-
 src/kudu/codegen/codegen-test.cc   |  8 ++++++
 src/kudu/codegen/module_builder.cc | 46 +++++++++++++++++++++++++--------
 4 files changed, 53 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/2d30f3a9/src/kudu/codegen/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/codegen/CMakeLists.txt b/src/kudu/codegen/CMakeLists.txt
index 0357e38..275de47 100644
--- a/src/kudu/codegen/CMakeLists.txt
+++ b/src/kudu/codegen/CMakeLists.txt
@@ -117,6 +117,15 @@ list(REMOVE_ITEM IR_FLAGS "-fsanitize=thread" "-DTHREAD_SANITIZER")
 # again at runtime.
 list(REMOVE_ITEM IR_FLAGS "-O3" "-O2" "-O1" "-Os")
 
+# Disable built-in LLVM passes which would add 'noinline' attributes to all
+# standalone functions.
+#
+# This is per the advice in https://reviews.llvm.org/D28053#629914 which says:
+#
+#   "This will generate a frontend-optimized but backend pristine bitcode file that
+#    can be processed more or less depending on the desire of the user..."
+list(APPEND IR_FLAGS "-O" "-mllvm" "-disable-llvm-optzns")
+
 # We need a library which depends on the IR source, because CMake+Ninja
 # doesn't support IMPLICIT_DEPENDS in ADD_CUSTOM_COMMAND.
 #

http://git-wip-us.apache.org/repos/asf/kudu/blob/2d30f3a9/src/kudu/codegen/code_generator.cc
----------------------------------------------------------------------
diff --git a/src/kudu/codegen/code_generator.cc b/src/kudu/codegen/code_generator.cc
index 337321b..8be6388 100644
--- a/src/kudu/codegen/code_generator.cc
+++ b/src/kudu/codegen/code_generator.cc
@@ -218,7 +218,7 @@ Status CodeGenerator::CompileRowProjector(const Schema& base, const Schema& proj
   RETURN_NOT_OK(RowProjectorFunctions::Create(base, proj, out, &tm));
 
   if (FLAGS_codegen_dump_mc) {
-    static const int kInstrMax = 500;
+    static const int kInstrMax = 1500;
     std::ostringstream sstr;
     sstr << "Printing read projection function:\n";
     int instrs = DumpAsm((*out)->read(), *tm, &sstr, kInstrMax);

http://git-wip-us.apache.org/repos/asf/kudu/blob/2d30f3a9/src/kudu/codegen/codegen-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/codegen/codegen-test.cc b/src/kudu/codegen/codegen-test.cc
index dccf155..a63b064 100644
--- a/src/kudu/codegen/codegen-test.cc
+++ b/src/kudu/codegen/codegen-test.cc
@@ -258,6 +258,14 @@ TEST_F(CodegenTest, ObservablesTest) {
   ASSERT_EQ(iwith->is_identity(), iwithout.is_identity());
   ASSERT_TRUE(iwith->is_identity());
 }
+
+// Test empty projection
+TEST_F(CodegenTest, TestEmpty) {
+  Schema empty;
+  TestProjection<true>(&empty);
+  TestProjection<false>(&empty);
+}
+
 // Test key projection
 TEST_F(CodegenTest, TestKey) {
   Schema key = base_.CreateKeyProjection();

http://git-wip-us.apache.org/repos/asf/kudu/blob/2d30f3a9/src/kudu/codegen/module_builder.cc
----------------------------------------------------------------------
diff --git a/src/kudu/codegen/module_builder.cc b/src/kudu/codegen/module_builder.cc
index 9f825e6..61d9e13 100644
--- a/src/kudu/codegen/module_builder.cc
+++ b/src/kudu/codegen/module_builder.cc
@@ -41,6 +41,7 @@
 #include <llvm/Support/raw_os_ostream.h>
 #include <llvm/Support/SourceMgr.h>
 #include <llvm/Transforms/IPO.h>
+#include <llvm/Transforms/IPO/AlwaysInliner.h>
 #include <llvm/Transforms/IPO/PassManagerBuilder.h>
 
 #include "kudu/codegen/precompiled.ll.h"
@@ -210,16 +211,33 @@ void ModuleBuilder::AddJITPromise(llvm::Function* llvm_f,
 
 namespace {
 
-#if CODEGEN_MODULE_BUILDER_DO_OPTIMIZATIONS
-
-void DoOptimizations(ExecutionEngine* engine,
-                     Module* module,
+void DoOptimizations(Module* module,
                      const unordered_set<string>& external_functions) {
   PassManagerBuilder pass_builder;
-  pass_builder.OptLevel = 2;
   // Don't optimize for code size (this corresponds to -O2/-O3)
   pass_builder.SizeLevel = 0;
-  pass_builder.Inliner = llvm::createFunctionInliningPass();
+#if CODEGEN_MODULE_BUILDER_DO_OPTIMIZATIONS
+  pass_builder.OptLevel = 2;
+  pass_builder.Inliner = llvm::createFunctionInliningPass(pass_builder.OptLevel,
+                                                          pass_builder.SizeLevel);
+#else
+  // Even if we don't want to do optimizations, we have to run the "AlwaysInliner" pass.
+  // This pass ensures that any functions marked 'always_inline' are inlined, but nothing
+  // else.
+  //
+  // If we don't, the following happens:
+  // - symbols in libc++ (eg _ZNKSt3__19basic_iosIcNS_11char_traitsIcEEE5rdbufEv) are
+  //   marked as __attribute__((always_inline)) in the header.
+  // - those symbols end up included with 'local' visibility in libc++.so, since the compiler
+  //   knows that all call sites should inline them.
+  // - if we don't run any inliner at all, then our generated code generates LLVM
+  //   'invoke' instructions to try to call these external functions, despite them
+  //   being marked 'always_inline'.
+  // - these 'invoke' instructions fail to link at runtime since they can't find the
+  //   dynamic symbol (due to its local visibility)
+  pass_builder.OptLevel = 0;
+  pass_builder.Inliner = llvm::createAlwaysInlinerLegacyPass();
+#endif
 
   FunctionPassManager fpm(module);
   pass_builder.populateFunctionPassManager(fpm);
@@ -239,6 +257,16 @@ void DoOptimizations(ExecutionEngine* engine,
   module_passes.add(llvm::createInternalizePass([&](const GlobalValue& v) {
     return ContainsKey(external_functions, v.getGlobalIdentifier());
   }));
+
+  // Run Global Dead Code Elimination.
+  //
+  // This is responsible for removing any unreferenced functions. This is
+  // important to do even in -O0 to workaround an issue we see when our generated
+  // functions are actually empty. In that case, for whatever reason (perhaps a bug in LLVM?)
+  // the compiled module would try to include versions of functions with calls to
+  // other functions marked "alwaysinline". The latter functions would not get linked
+  // in our compiled module, and then the module would fail to load.
+  module_passes.add(llvm::createGlobalDCEPass());
   pass_builder.populateModulePassManager(module_passes);
 
   // Same as above, the result here just indicates whether optimization made any changes.
@@ -246,8 +274,6 @@ void DoOptimizations(ExecutionEngine* engine,
   ignore_result(module_passes.run(*module));
 }
 
-#endif
-
 vector<string> GetHostCPUAttrs() {
   // LLVM's ExecutionEngine expects features to be enabled or disabled with a list
   // of strings like ["+feature1", "-feature2"].
@@ -288,9 +314,7 @@ Status ModuleBuilder::Compile(unique_ptr<ExecutionEngine>* out) {
   }
   module->setDataLayout(target_->createDataLayout());
 
-#if CODEGEN_MODULE_BUILDER_DO_OPTIMIZATIONS
-  DoOptimizations(local_engine.get(), module, GetFunctionNames());
-#endif
+  DoOptimizations(module, GetFunctionNames());
 
   // Compile the module
   local_engine->finalizeObject();