You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by aw...@apache.org on 2020/04/09 04:02:54 UTC
[kudu] branch branch-1.12.x updated: columnar_serialization: fix
optimized GCC build
This is an automated email from the ASF dual-hosted git repository.
awong pushed a commit to branch branch-1.12.x
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/branch-1.12.x by this push:
new f13a187 columnar_serialization: fix optimized GCC build
f13a187 is described below
commit f13a187d6d436a17429a3158c4058f966d12b831
Author: Todd Lipcon <to...@apache.org>
AuthorDate: Tue Apr 7 16:42:20 2020 -0700
columnar_serialization: fix optimized GCC build
It seems that GCC with -O1 or higher is less lenient about the pointer
types passed as the first argument to the _mm_gather* intrinsics. This
was causing the Ubuntu 18 release build to fail.
I was able to reproduce the failure and fix on my own Ubuntu 18 box
using a fastdebug build.
Change-Id: I6b69470b238dae7a33fdd6b44cb8be57a26501d7
Reviewed-on: http://gerrit.cloudera.org:8080/15680
Tested-by: Kudu Jenkins
Reviewed-by: Grant Henke <gr...@apache.org>
(cherry picked from commit b386b71b6733bae903f6a051376b49a727e8b3bb)
Reviewed-on: http://gerrit.cloudera.org:8080/15682
Reviewed-by: Hao Hao <ha...@cloudera.com>
Tested-by: Alexey Serbin <as...@cloudera.com>
Reviewed-by: Alexey Serbin <as...@cloudera.com>
---
src/kudu/common/columnar_serialization.cc | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/src/kudu/common/columnar_serialization.cc b/src/kudu/common/columnar_serialization.cc
index eaa8469..f6b289e 100644
--- a/src/kudu/common/columnar_serialization.cc
+++ b/src/kudu/common/columnar_serialization.cc
@@ -356,7 +356,9 @@ int CopySelectedRowsAvx<4>(
// since the 'gather' instructions don't support 16-bit indexes.
__m256i indexes = _mm256_cvtepu16_epi32(*reinterpret_cast<const __m128i*>(sel_rows));
// Gather 8x32-bit elements from src_buf[index*sizeof_type] for each index.
- __m256i elems = _mm256_i32gather_epi32(src_buf, indexes, sizeof_type);
+ // We need this cast to compile on some versions of GCC.
+ const auto* src_i32 = reinterpret_cast<const int32_t*>(src_buf);
+ __m256i elems = _mm256_i32gather_epi32(src_i32, indexes, sizeof_type);
// Store the 8x32-bit elements into the destination.
_mm256_storeu_si256(reinterpret_cast<__m256i*>(dst_buf), elems);
dst_buf += ints_per_vector * sizeof_type;
@@ -384,7 +386,8 @@ int CopySelectedRowsAvx<8>(
sel_rows[1],
sel_rows[0]);
// Load 4x64-bit integers from src_buf[index * sizeof_type] for each index.
- __m256i elems = _mm256_i32gather_epi64(src_buf, indexes, sizeof_type);
+ const auto* src_lli = reinterpret_cast<const long long int*>(src_buf); // NOLINT(*)
+ __m256i elems = _mm256_i32gather_epi64(src_lli, indexes, sizeof_type);
// Store the 4x64-bit integers in the destination.
_mm256_storeu_si256(reinterpret_cast<__m256i*>(dst_buf), elems);
dst_buf += ints_per_vector * sizeof_type;