You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by gr...@apache.org on 2020/04/08 00:43:30 UTC
[kudu] branch master updated: columnar_serialization: fix optimized GCC build

This is an automated email from the ASF dual-hosted git repository.

granthenke pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new b386b71  columnar_serialization: fix optimized GCC build
b386b71 is described below

commit b386b71b6733bae903f6a051376b49a727e8b3bb
Author: Todd Lipcon <to...@apache.org>
AuthorDate: Tue Apr 7 16:42:20 2020 -0700

    columnar_serialization: fix optimized GCC build
    
    It seems that GCC with -O1 or higher is less lenient about the pointer
    types passed as the first argument to the _mm_gather* intrinsics. This
    was causing the Ubuntu 18 release build to fail.
    
    I was able to reproduce the failure and fix on my own Ubuntu 18 box
    using a fastdebug build.
    
    Change-Id: I6b69470b238dae7a33fdd6b44cb8be57a26501d7
    Reviewed-on: http://gerrit.cloudera.org:8080/15680
    Tested-by: Kudu Jenkins
    Reviewed-by: Grant Henke <gr...@apache.org>
---
 src/kudu/common/columnar_serialization.cc | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/kudu/common/columnar_serialization.cc b/src/kudu/common/columnar_serialization.cc
index eaa8469..f6b289e 100644
--- a/src/kudu/common/columnar_serialization.cc
+++ b/src/kudu/common/columnar_serialization.cc
@@ -356,7 +356,9 @@ int CopySelectedRowsAvx<4>(
     // since the 'gather' instructions don't support 16-bit indexes.
     __m256i indexes = _mm256_cvtepu16_epi32(*reinterpret_cast<const __m128i*>(sel_rows));
     // Gather 8x32-bit elements from src_buf[index*sizeof_type] for each index.
-    __m256i elems = _mm256_i32gather_epi32(src_buf, indexes, sizeof_type);
+    // We need this cast to compile on some versions of GCC.
+    const auto* src_i32 = reinterpret_cast<const int32_t*>(src_buf);
+    __m256i elems = _mm256_i32gather_epi32(src_i32, indexes, sizeof_type);
     // Store the 8x32-bit elements into the destination.
     _mm256_storeu_si256(reinterpret_cast<__m256i*>(dst_buf), elems);
     dst_buf += ints_per_vector * sizeof_type;
@@ -384,7 +386,8 @@ int CopySelectedRowsAvx<8>(
                                     sel_rows[1],
                                     sel_rows[0]);
     // Load 4x64-bit integers from src_buf[index * sizeof_type] for each index.
-    __m256i elems = _mm256_i32gather_epi64(src_buf, indexes, sizeof_type);
+    const auto* src_lli = reinterpret_cast<const long long int*>(src_buf); // NOLINT(*)
+    __m256i elems = _mm256_i32gather_epi64(src_lli, indexes, sizeof_type);
     // Store the 4x64-bit integers in the destination.
     _mm256_storeu_si256(reinterpret_cast<__m256i*>(dst_buf), elems);
     dst_buf += ints_per_vector * sizeof_type;