You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2023/04/29 06:50:46 UTC

[doris] branch master updated: [optimization](simd) optimize count_zero_num for ColumnNullable #19124

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new d383f1f3d7 [optimization](simd) optimize count_zero_num for ColumnNullable #19124
d383f1f3d7 is described below

commit d383f1f3d7734ea260470fb288db18e43d57f78e
Author: zclllyybb <zh...@selectdb.com>
AuthorDate: Sat Apr 29 14:50:39 2023 +0800

    [optimization](simd) optimize count_zero_num for ColumnNullable #19124
---
 be/src/util/simd/bits.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/be/src/util/simd/bits.h b/be/src/util/simd/bits.h
index a38363d7ff..45f82b23ac 100644
--- a/be/src/util/simd/bits.h
+++ b/be/src/util/simd/bits.h
@@ -87,6 +87,36 @@ inline size_t count_zero_num(const int8_t* __restrict data, const uint8_t* __res
                              size_t size) {
     size_t num = 0;
     const int8_t* end = data + size;
+#if defined(__SSE2__) && defined(__POPCNT__)
+    const __m128i zero16 = _mm_setzero_si128();
+    const int8_t* end64 = data + (size / 64 * 64);
+
+    for (; data < end64; data += 64) {
+        num += __builtin_popcountll(
+                static_cast<uint64_t>(_mm_movemask_epi8(_mm_or_si128(
+                        _mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(data)),
+                                       zero16),
+                        _mm_loadu_si128(reinterpret_cast<const __m128i*>(null_map))))) |
+                (static_cast<uint64_t>(_mm_movemask_epi8(_mm_or_si128(
+                         _mm_cmpeq_epi8(
+                                 _mm_loadu_si128(reinterpret_cast<const __m128i*>(data + 16)),
+                                 zero16),
+                         _mm_loadu_si128(reinterpret_cast<const __m128i*>(null_map + 16)))))
+                 << 16u) |
+                (static_cast<uint64_t>(_mm_movemask_epi8(_mm_or_si128(
+                         _mm_cmpeq_epi8(
+                                 _mm_loadu_si128(reinterpret_cast<const __m128i*>(data + 32)),
+                                 zero16),
+                         _mm_loadu_si128(reinterpret_cast<const __m128i*>(null_map + 32)))))
+                 << 32u) |
+                (static_cast<uint64_t>(_mm_movemask_epi8(_mm_or_si128(
+                         _mm_cmpeq_epi8(
+                                 _mm_loadu_si128(reinterpret_cast<const __m128i*>(data + 48)),
+                                 zero16),
+                         _mm_loadu_si128(reinterpret_cast<const __m128i*>(null_map + 48)))))
+                 << 48u));
+    }
+#endif
     for (; data < end; ++data, ++null_map) {
         num += ((*data == 0) | *null_map);
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org