You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by "github-actions[bot] (via GitHub)" <gi...@apache.org> on 2023/04/27 12:28:22 UTC

[GitHub] [doris] github-actions[bot] commented on a diff in pull request #19172: add bit unpacking SIMD implementation

github-actions[bot] commented on code in PR #19172:
URL: https://github.com/apache/doris/pull/19172#discussion_r1179075654


##########
be/src/util/bit_packing.inline.h:
##########
@@ -98,6 +212,27 @@ std::pair<const uint8_t*, int64_t> BitPacking::UnpackValues(const uint8_t* __res
     return std::make_pair(in_pos, values_to_read);
 }
 
+template <typename T>
+constexpr bool IsSupportedUnpackingType() {

Review Comment:
   warning: redefinition of 'IsSupportedUnpackingType' [clang-diagnostic-error]
   ```cpp
   constexpr bool IsSupportedUnpackingType() {
                  ^
   ```
   **be/src/util/bit_packing.inline.h:159:** previous definition is here
   ```cpp
   constexpr bool IsSupportedUnpackingType() {
                  ^
   ```
   



##########
be/src/util/bitpacking/unpack_16u.h:
##########
@@ -0,0 +1,1252 @@
+// Copyright 2021-present StarRocks, Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "extend_16u.h"
+#include "unpack_def.h"
+
+// ------------------------------------ 9u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_table_9u_0[32]) = {
+        0u, 1u,  1u,  2u,  2u,  3u,  3u,  4u,  4u,  5u,  5u,  6u,  6u,  7u,  7u,  8u,
+        9u, 10u, 10u, 11u, 11u, 12u, 12u, 13u, 13u, 14u, 14u, 15u, 15u, 16u, 16u, 17u};
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_table_9u_1[32]) = {
+        0u, 1u,  1u,  2u,  2u,  3u,  3u,  4u,  5u,  6u,  6u,  7u,  7u,  8u,  8u,  9u,
+        9u, 10u, 10u, 11u, 11u, 12u, 12u, 13u, 14u, 15u, 15u, 16u, 16u, 17u, 17u, 18u};
+OWN_ALIGNED_64_ARRAY(static uint32_t shift_table_9u_0[16]) = {0u, 2u, 4u, 6u, 8u, 10u, 12u, 14u,
+                                                              0u, 2u, 4u, 6u, 8u, 10u, 12u, 14u};
+OWN_ALIGNED_64_ARRAY(static uint32_t shift_table_9u_1[16]) = {7u, 5u, 3u, 1u, 15u, 13u, 11u, 9u,
+                                                              7u, 5u, 3u, 1u, 15u, 13u, 11u, 9u};
+
+// ------------------------------------ 10u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_table_10u_0[32]) = {
+        0u,  1u,  1u,  2u,  2u,  3u,  3u,  4u,  5u,  6u,  6u,  7u,  7u,  8u,  8u,  9u,
+        10u, 11u, 11u, 12u, 12u, 13u, 13u, 14u, 15u, 16u, 16u, 17u, 17u, 18u, 18u, 19u};
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_table_10u_1[32]) = {
+        0u,  1u,  1u,  2u,  3u,  4u,  4u,  5u,  5u,  6u,  6u,  7u,  8u,  9u,  9u,  10u,
+        10u, 11u, 11u, 12u, 13u, 14u, 14u, 15u, 15u, 16u, 16u, 17u, 18u, 19u, 19u, 20u};
+OWN_ALIGNED_64_ARRAY(static uint32_t shift_table_10u_0[16]) = {0u, 4u, 8u, 12u, 0u, 4u, 8u, 12u,
+                                                               0u, 4u, 8u, 12u, 0u, 4u, 8u, 12u};
+OWN_ALIGNED_64_ARRAY(static uint32_t shift_table_10u_1[16]) = {6u, 2u, 14u, 10u, 6u, 2u, 14u, 10u,
+                                                               6u, 2u, 14u, 10u, 6u, 2u, 14u, 10u};
+
+// ------------------------------------ 11u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_table_11u_0[32]) = {
+        0u,  1u,  1u,  2u,  2u,  3u,  4u,  5u,  5u,  6u,  6u,  7u,  8u,  9u,  9u,  10u,
+        11u, 12u, 12u, 13u, 13u, 14u, 15u, 16u, 16u, 17u, 17u, 18u, 19u, 20u, 20u, 21u};
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_table_11u_1[32]) = {
+        0u,  1u,  2u,  3u,  3u,  4u,  4u,  5u,  6u,  7u,  7u,  8u,  8u,  9u,  10u, 11u,
+        11u, 12u, 13u, 14u, 14u, 15u, 15u, 16u, 17u, 18u, 18u, 19u, 19u, 20u, 21u, 22u};
+OWN_ALIGNED_64_ARRAY(static uint32_t shift_table_11u_0[16]) = {0u, 6u, 12u, 2u, 8u, 14u, 4u, 10u,
+                                                               0u, 6u, 12u, 2u, 8u, 14u, 4u, 10u};
+OWN_ALIGNED_64_ARRAY(static uint32_t shift_table_11u_1[16]) = {5u, 15u, 9u, 3u, 13u, 7u, 1u, 11u,
+                                                               5u, 15u, 9u, 3u, 13u, 7u, 1u, 11u};
+
+// ------------------------------------ 12u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_table_12u_0[32]) = {
+        0u,  1u,  1u,  2u,  3u,  4u,  4u,  5u,  6u,  7u,  7u,  8u,  9u,  10u, 10u, 11u,
+        12u, 13u, 13u, 14u, 15u, 16u, 16u, 17u, 18u, 19u, 19u, 20u, 21u, 22u, 22u, 23u};
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_table_12u_1[32]) = {
+        0u,  1u,  2u,  3u,  3u,  4u,  5u,  6u,  6u,  7u,  8u,  9u,  9u,  10u, 11u, 12u,
+        12u, 13u, 14u, 15u, 15u, 16u, 17u, 18u, 18u, 19u, 20u, 21u, 21u, 22u, 23u, 24u};
+OWN_ALIGNED_64_ARRAY(static uint32_t shift_table_12u_0[16]) = {0u, 8u, 0u, 8u, 0u, 8u, 0u, 8u,
+                                                               0u, 8u, 0u, 8u, 0u, 8u, 0u, 8u};
+OWN_ALIGNED_64_ARRAY(static uint32_t shift_table_12u_1[16]) = {4u, 12u, 4u, 12u, 4u, 12u, 4u, 12u,
+                                                               4u, 12u, 4u, 12u, 4u, 12u, 4u, 12u};
+
+// ------------------------------------ 13u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_table_13u_0[32]) = {
+        0u,  1u,  1u,  2u,  3u,  4u,  4u,  5u,  6u,  7u,  8u,  9u,  9u,  10u, 11u, 12u,
+        13u, 14u, 14u, 15u, 16u, 17u, 17u, 18u, 19u, 20u, 21u, 22u, 22u, 23u, 24u, 25u};
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_table_13u_1[32]) = {
+        0u,  1u,  2u,  3u,  4u,  5u,  5u,  6u,  7u,  8u,  8u,  9u,  10u, 11u, 12u, 13u,
+        13u, 14u, 15u, 16u, 17u, 18u, 18u, 19u, 20u, 21u, 21u, 22u, 23u, 24u, 25u, 26u};
+OWN_ALIGNED_64_ARRAY(static uint32_t shift_table_13u_0[16]) = {0u, 10u, 4u, 14u, 8u, 2u, 12u, 6u,
+                                                               0u, 10u, 4u, 14u, 8u, 2u, 12u, 6u};
+OWN_ALIGNED_64_ARRAY(static uint32_t shift_table_13u_1[16]) = {3u, 9u, 15u, 5u, 11u, 1u, 7u, 13u,
+                                                               3u, 9u, 15u, 5u, 11u, 1u, 7u, 13u};
+
+// ------------------------------------ 14u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_table_14u_0[32]) = {
+        0u,  1u,  1u,  2u,  3u,  4u,  5u,  6u,  7u,  8u,  8u,  9u,  10u, 11u, 12u, 13u,
+        14u, 15u, 15u, 16u, 17u, 18u, 19u, 20u, 21u, 22u, 22u, 23u, 24u, 25u, 26u, 27u};
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_table_14u_1[32]) = {
+        0u,  1u,  2u,  3u,  4u,  5u,  6u,  7u,  7u,  8u,  9u,  10u, 11u, 12u, 13u, 14u,
+        14u, 15u, 16u, 17u, 18u, 19u, 20u, 21u, 21u, 22u, 23u, 24u, 25u, 26u, 27u, 28u};
+OWN_ALIGNED_64_ARRAY(static uint32_t shift_table_14u_0[16]) = {0u, 12u, 8u, 4u, 0u, 12u, 8u, 4u,
+                                                               0u, 12u, 8u, 4u, 0u, 12u, 8u, 4u};
+OWN_ALIGNED_64_ARRAY(static uint32_t shift_table_14u_1[16]) = {2u, 6u, 10u, 14u, 2u, 6u, 10u, 14u,
+                                                               2u, 6u, 10u, 14u, 2u, 6u, 10u, 14u};
+
+// ------------------------------------ 15u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_table_15u_0[32]) = {
+        0u,  1u,  1u,  2u,  3u,  4u,  5u,  6u,  7u,  8u,  9u,  10u, 11u, 12u, 13u, 14u,
+        15u, 16u, 16u, 17u, 18u, 19u, 20u, 21u, 22u, 23u, 24u, 25u, 26u, 27u, 28u, 29u};
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_table_15u_1[32]) = {
+        0u,  1u,  2u,  3u,  4u,  5u,  6u,  7u,  8u,  9u,  10u, 11u, 12u, 13u, 14u, 15u,
+        15u, 16u, 17u, 18u, 19u, 20u, 21u, 22u, 23u, 24u, 25u, 26u, 27u, 28u, 29u, 30u};
+OWN_ALIGNED_64_ARRAY(static uint32_t shift_table_15u_0[16]) = {0u, 14u, 12u, 10u, 8u, 6u, 4u, 2u,
+                                                               0u, 14u, 12u, 10u, 8u, 6u, 4u, 2u};
+OWN_ALIGNED_64_ARRAY(static uint32_t shift_table_15u_1[16]) = {1u, 3u, 5u, 7u, 9u, 11u, 13u, 15u,
+                                                               1u, 3u, 5u, 7u, 9u, 11u, 13u, 15u};
+
+template <typename OutType>
+const uint8_t* unpack_Nu16u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t bit_width, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_Nu16u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t bit_width,
+                                   uint16_t* dst_ptr) {
+    uint32_t mask = OWN_BIT_MASK(bit_width);
+    uint32_t next_word;
+    uint32_t bits_in_buf = OWN_WORD_WIDTH;
+    uint16_t* src16u_ptr = (uint16_t*)src_ptr;
+    uint16_t* dst16u_ptr = (uint16_t*)dst_ptr;
+    uint32_t src = (uint32_t)(*src16u_ptr);
+    src16u_ptr++;
+
+    while (1u < values_to_read) {
+        if (bit_width > bits_in_buf) {
+            next_word = (uint32_t)(*src16u_ptr);
+            src16u_ptr++;
+            next_word = next_word << bits_in_buf;
+            src = src | next_word;
+            bits_in_buf += OWN_WORD_WIDTH;
+        }
+        *dst16u_ptr = (uint16_t)(src & mask);
+        src = src >> bit_width;
+        bits_in_buf -= bit_width;
+        dst16u_ptr++;
+        values_to_read--;
+    }
+
+    if (bit_width > bits_in_buf) {
+        next_word = (uint32_t)(bit_width - bits_in_buf > 8u ? *src16u_ptr : *((uint8_t*)src16u_ptr));
+        next_word = next_word << bits_in_buf;
+        src = src | next_word;
+    }
+    *dst16u_ptr = (uint16_t)(src & mask);
+    return (uint8_t*)(src16u_ptr);
+}
+
+template <>
+inline const uint8_t* unpack_Nu16u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t bit_width,
+                                   uint32_t* dst_ptr) {
+    printf("=== test wwq unpack_Nu16u ===\n");
+    uint32_t mask = OWN_BIT_MASK(bit_width);
+    uint32_t next_word;
+    uint32_t bits_in_buf = OWN_WORD_WIDTH;
+    uint16_t* src16u_ptr = (uint16_t*)src_ptr;
+    uint32_t* dst32u_ptr = (uint32_t*)dst_ptr;
+    uint32_t src = (uint32_t)(*src16u_ptr);
+    src16u_ptr++;
+
+    while (1u < values_to_read) {
+        if (bit_width > bits_in_buf) {
+            next_word = (uint32_t)(*src16u_ptr);
+            src16u_ptr++;
+            next_word = next_word << bits_in_buf;
+            src = src | next_word;
+            bits_in_buf += OWN_WORD_WIDTH;
+        }
+        *dst32u_ptr = (uint32_t)(src & mask);
+        src = src >> bit_width;
+        bits_in_buf -= bit_width;
+        dst32u_ptr++;
+        values_to_read--;
+    }
+
+    if (bit_width > bits_in_buf) {
+        next_word = (uint32_t)(bit_width - bits_in_buf > 8u ? *src16u_ptr : *((uint8_t*)src16u_ptr));
+        next_word = next_word << bits_in_buf;
+        src = src | next_word;
+    }
+    *dst32u_ptr = (uint32_t)(src & mask);
+    return (uint8_t*)(src16u_ptr);
+}
+
+template <>
+inline const uint8_t* unpack_Nu16u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t bit_width,
+                                   uint64_t* dst_ptr) {
+    uint32_t mask = OWN_BIT_MASK(bit_width);
+    uint32_t next_word;
+    uint32_t bits_in_buf = OWN_WORD_WIDTH;
+    uint16_t* src16u_ptr = (uint16_t*)src_ptr;
+    uint64_t* dst64u_ptr = (uint64_t*)dst_ptr;
+    uint32_t src = (uint32_t)(*src16u_ptr);
+    src16u_ptr++;
+
+    while (1u < values_to_read) {
+        if (bit_width > bits_in_buf) {
+            next_word = (uint32_t)(*src16u_ptr);
+            src16u_ptr++;
+            next_word = next_word << bits_in_buf;
+            src = src | next_word;
+            bits_in_buf += OWN_WORD_WIDTH;
+        }
+        *dst64u_ptr = (uint64_t)(src & mask);
+        src = src >> bit_width;
+        bits_in_buf -= bit_width;
+        dst64u_ptr++;
+        values_to_read--;
+    }
+
+    if (bit_width > bits_in_buf) {
+        next_word = (uint32_t)(bit_width - bits_in_buf > 8u ? *src16u_ptr : *((uint8_t*)src16u_ptr));
+        next_word = next_word << bits_in_buf;
+        src = src | next_word;
+    }
+    *dst64u_ptr = (uint64_t)(src & mask);
+    return (uint8_t*)(src16u_ptr);
+}
+
+template <typename OutType>
+const uint8_t* unpack_9u16u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_9u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_9u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint16_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(9u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(9u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_9u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_9u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_9u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_9u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 4u * 9u;
+            dst_ptr += 32u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 9u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_9u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(9u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(9u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_9u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_9u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_9u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_9u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_16u32u(zmm[0], dst_ptr);
+
+            src_ptr += 4u * 9u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 9u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_9u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(9u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(9u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_9u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_9u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_9u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_9u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_16u64u(zmm[0], dst_ptr);
+
+            src_ptr += 4u * 9u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 9u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <typename OutType>
+const uint8_t* unpack_10u16u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_10u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_10u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint16_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(10u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(10u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_10u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_10u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_10u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_10u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 4u * 10u;
+            dst_ptr += 32u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 10u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_10u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(10u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(10u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_10u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_10u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_10u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_10u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_16u32u(zmm[0], dst_ptr);
+
+            src_ptr += 4u * 10u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 10u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_10u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(10u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(10u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_10u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_10u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_10u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_10u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_16u64u(zmm[0], dst_ptr);
+
+            src_ptr += 4u * 10u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 10u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <typename OutType>
+const uint8_t* unpack_11u16u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_11u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_11u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint16_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(11u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(11u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_11u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_11u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_11u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_11u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 4u * 11u;
+            dst_ptr += 32u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 11u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_11u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(11u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(11u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_11u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_11u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_11u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_11u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_16u32u(zmm[0], dst_ptr);
+
+            src_ptr += 4u * 11u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 11u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_11u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(11u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(11u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_11u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_11u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_11u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_11u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_16u64u(zmm[0], dst_ptr);
+
+            src_ptr += 4u * 11u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 11u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <typename OutType>
+const uint8_t* unpack_12u16u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_12u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_12u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint16_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(12u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(12u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_12u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_12u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_12u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_12u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 4u * 12u;
+            dst_ptr += 32u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 12u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_12u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(12u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(12u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_12u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_12u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_12u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_12u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_16u32u(zmm[0], dst_ptr);
+
+            src_ptr += 4u * 12u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 12u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_12u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(12u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(12u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_12u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_12u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_12u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_12u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_16u64u(zmm[0], dst_ptr);
+
+            src_ptr += 4u * 12u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 12u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <typename OutType>
+const uint8_t* unpack_13u16u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_13u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_13u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint16_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(13u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(13u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_13u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_13u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_13u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_13u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 4u * 13u;
+            dst_ptr += 32u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 13u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_13u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    printf("=== test wwq unpack_13u16u ===\n");
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(13u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(13u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_13u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_13u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_13u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_13u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_16u32u(zmm[0], dst_ptr);
+
+            src_ptr += 4u * 13u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 13u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_13u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(13u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(13u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_13u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_13u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_13u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_13u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_16u64u(zmm[0], dst_ptr);
+
+            src_ptr += 4u * 13u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 13u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <typename OutType>
+const uint8_t* unpack_14u16u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_14u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_14u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint16_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(14u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(14u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_14u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_14u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_14u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_14u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 4u * 14u;
+            dst_ptr += 32u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 14u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_14u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(14u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(14u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_14u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_14u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_14u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_14u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_16u32u(zmm[0], dst_ptr);
+
+            src_ptr += 4u * 14u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 14u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_14u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(14u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(14u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_14u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_14u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_14u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_14u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_16u64u(zmm[0], dst_ptr);
+
+            src_ptr += 4u * 14u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 14u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <typename OutType>
+const uint8_t* unpack_15u16u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_15u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_15u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint16_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(15u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(15u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_15u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_15u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_15u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_15u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 4u * 15u;
+            dst_ptr += 32u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 15u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_15u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(15u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(15u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_15u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_15u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_15u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_15u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_16u32u(zmm[0], dst_ptr);
+
+            src_ptr += 4u * 15u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 15u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_15u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(15u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(15u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_15u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_15u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_15u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_15u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_16u64u(zmm[0], dst_ptr);
+
+            src_ptr += 4u * 15u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 15u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <typename OutType>
+const uint8_t* unpack_16u16u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_16u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_16u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint16_t* dst_ptr) {

Review Comment:
   warning: pointer parameter 'dst_ptr' can be pointer to const [readability-non-const-parameter]
   
   be/src/util/bitpacking/unpack_16u.h:1199:
   ```diff
   - const uint8_t* unpack_16u16u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
   + const uint8_t* unpack_16u16u(const uint8_t* src_ptr, uint32_t values_to_read, const OutType* dst_ptr);
   ```
   
   ```suggestion
   inline const uint8_t* unpack_16u16u(const uint8_t* src_ptr, uint32_t values_to_read, const uint16_t* dst_ptr) {
   ```
   



##########
be/src/util/bitpacking/unpack_16u.h:
##########
@@ -0,0 +1,1252 @@
+// Copyright 2021-present StarRocks, Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "extend_16u.h"
+#include "unpack_def.h"
+
+// ------------------------------------ 9u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_table_9u_0[32]) = {
+        0u, 1u,  1u,  2u,  2u,  3u,  3u,  4u,  4u,  5u,  5u,  6u,  6u,  7u,  7u,  8u,
+        9u, 10u, 10u, 11u, 11u, 12u, 12u, 13u, 13u, 14u, 14u, 15u, 15u, 16u, 16u, 17u};
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_table_9u_1[32]) = {
+        0u, 1u,  1u,  2u,  2u,  3u,  3u,  4u,  5u,  6u,  6u,  7u,  7u,  8u,  8u,  9u,
+        9u, 10u, 10u, 11u, 11u, 12u, 12u, 13u, 14u, 15u, 15u, 16u, 16u, 17u, 17u, 18u};
+OWN_ALIGNED_64_ARRAY(static uint32_t shift_table_9u_0[16]) = {0u, 2u, 4u, 6u, 8u, 10u, 12u, 14u,
+                                                              0u, 2u, 4u, 6u, 8u, 10u, 12u, 14u};
+OWN_ALIGNED_64_ARRAY(static uint32_t shift_table_9u_1[16]) = {7u, 5u, 3u, 1u, 15u, 13u, 11u, 9u,
+                                                              7u, 5u, 3u, 1u, 15u, 13u, 11u, 9u};
+
+// ------------------------------------ 10u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_table_10u_0[32]) = {
+        0u,  1u,  1u,  2u,  2u,  3u,  3u,  4u,  5u,  6u,  6u,  7u,  7u,  8u,  8u,  9u,
+        10u, 11u, 11u, 12u, 12u, 13u, 13u, 14u, 15u, 16u, 16u, 17u, 17u, 18u, 18u, 19u};
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_table_10u_1[32]) = {
+        0u,  1u,  1u,  2u,  3u,  4u,  4u,  5u,  5u,  6u,  6u,  7u,  8u,  9u,  9u,  10u,
+        10u, 11u, 11u, 12u, 13u, 14u, 14u, 15u, 15u, 16u, 16u, 17u, 18u, 19u, 19u, 20u};
+OWN_ALIGNED_64_ARRAY(static uint32_t shift_table_10u_0[16]) = {0u, 4u, 8u, 12u, 0u, 4u, 8u, 12u,
+                                                               0u, 4u, 8u, 12u, 0u, 4u, 8u, 12u};
+OWN_ALIGNED_64_ARRAY(static uint32_t shift_table_10u_1[16]) = {6u, 2u, 14u, 10u, 6u, 2u, 14u, 10u,
+                                                               6u, 2u, 14u, 10u, 6u, 2u, 14u, 10u};
+
+// ------------------------------------ 11u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_table_11u_0[32]) = {
+        0u,  1u,  1u,  2u,  2u,  3u,  4u,  5u,  5u,  6u,  6u,  7u,  8u,  9u,  9u,  10u,
+        11u, 12u, 12u, 13u, 13u, 14u, 15u, 16u, 16u, 17u, 17u, 18u, 19u, 20u, 20u, 21u};
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_table_11u_1[32]) = {
+        0u,  1u,  2u,  3u,  3u,  4u,  4u,  5u,  6u,  7u,  7u,  8u,  8u,  9u,  10u, 11u,
+        11u, 12u, 13u, 14u, 14u, 15u, 15u, 16u, 17u, 18u, 18u, 19u, 19u, 20u, 21u, 22u};
+OWN_ALIGNED_64_ARRAY(static uint32_t shift_table_11u_0[16]) = {0u, 6u, 12u, 2u, 8u, 14u, 4u, 10u,
+                                                               0u, 6u, 12u, 2u, 8u, 14u, 4u, 10u};
+OWN_ALIGNED_64_ARRAY(static uint32_t shift_table_11u_1[16]) = {5u, 15u, 9u, 3u, 13u, 7u, 1u, 11u,
+                                                               5u, 15u, 9u, 3u, 13u, 7u, 1u, 11u};
+
+// ------------------------------------ 12u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_table_12u_0[32]) = {
+        0u,  1u,  1u,  2u,  3u,  4u,  4u,  5u,  6u,  7u,  7u,  8u,  9u,  10u, 10u, 11u,
+        12u, 13u, 13u, 14u, 15u, 16u, 16u, 17u, 18u, 19u, 19u, 20u, 21u, 22u, 22u, 23u};
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_table_12u_1[32]) = {
+        0u,  1u,  2u,  3u,  3u,  4u,  5u,  6u,  6u,  7u,  8u,  9u,  9u,  10u, 11u, 12u,
+        12u, 13u, 14u, 15u, 15u, 16u, 17u, 18u, 18u, 19u, 20u, 21u, 21u, 22u, 23u, 24u};
+OWN_ALIGNED_64_ARRAY(static uint32_t shift_table_12u_0[16]) = {0u, 8u, 0u, 8u, 0u, 8u, 0u, 8u,
+                                                               0u, 8u, 0u, 8u, 0u, 8u, 0u, 8u};
+OWN_ALIGNED_64_ARRAY(static uint32_t shift_table_12u_1[16]) = {4u, 12u, 4u, 12u, 4u, 12u, 4u, 12u,
+                                                               4u, 12u, 4u, 12u, 4u, 12u, 4u, 12u};
+
+// ------------------------------------ 13u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_table_13u_0[32]) = {
+        0u,  1u,  1u,  2u,  3u,  4u,  4u,  5u,  6u,  7u,  8u,  9u,  9u,  10u, 11u, 12u,
+        13u, 14u, 14u, 15u, 16u, 17u, 17u, 18u, 19u, 20u, 21u, 22u, 22u, 23u, 24u, 25u};
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_table_13u_1[32]) = {
+        0u,  1u,  2u,  3u,  4u,  5u,  5u,  6u,  7u,  8u,  8u,  9u,  10u, 11u, 12u, 13u,
+        13u, 14u, 15u, 16u, 17u, 18u, 18u, 19u, 20u, 21u, 21u, 22u, 23u, 24u, 25u, 26u};
+OWN_ALIGNED_64_ARRAY(static uint32_t shift_table_13u_0[16]) = {0u, 10u, 4u, 14u, 8u, 2u, 12u, 6u,
+                                                               0u, 10u, 4u, 14u, 8u, 2u, 12u, 6u};
+OWN_ALIGNED_64_ARRAY(static uint32_t shift_table_13u_1[16]) = {3u, 9u, 15u, 5u, 11u, 1u, 7u, 13u,
+                                                               3u, 9u, 15u, 5u, 11u, 1u, 7u, 13u};
+
+// ------------------------------------ 14u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_table_14u_0[32]) = {
+        0u,  1u,  1u,  2u,  3u,  4u,  5u,  6u,  7u,  8u,  8u,  9u,  10u, 11u, 12u, 13u,
+        14u, 15u, 15u, 16u, 17u, 18u, 19u, 20u, 21u, 22u, 22u, 23u, 24u, 25u, 26u, 27u};
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_table_14u_1[32]) = {
+        0u,  1u,  2u,  3u,  4u,  5u,  6u,  7u,  7u,  8u,  9u,  10u, 11u, 12u, 13u, 14u,
+        14u, 15u, 16u, 17u, 18u, 19u, 20u, 21u, 21u, 22u, 23u, 24u, 25u, 26u, 27u, 28u};
+OWN_ALIGNED_64_ARRAY(static uint32_t shift_table_14u_0[16]) = {0u, 12u, 8u, 4u, 0u, 12u, 8u, 4u,
+                                                               0u, 12u, 8u, 4u, 0u, 12u, 8u, 4u};
+OWN_ALIGNED_64_ARRAY(static uint32_t shift_table_14u_1[16]) = {2u, 6u, 10u, 14u, 2u, 6u, 10u, 14u,
+                                                               2u, 6u, 10u, 14u, 2u, 6u, 10u, 14u};
+
+// ------------------------------------ 15u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_table_15u_0[32]) = {
+        0u,  1u,  1u,  2u,  3u,  4u,  5u,  6u,  7u,  8u,  9u,  10u, 11u, 12u, 13u, 14u,
+        15u, 16u, 16u, 17u, 18u, 19u, 20u, 21u, 22u, 23u, 24u, 25u, 26u, 27u, 28u, 29u};
+OWN_ALIGNED_64_ARRAY(static uint16_t permutex_idx_table_15u_1[32]) = {
+        0u,  1u,  2u,  3u,  4u,  5u,  6u,  7u,  8u,  9u,  10u, 11u, 12u, 13u, 14u, 15u,
+        15u, 16u, 17u, 18u, 19u, 20u, 21u, 22u, 23u, 24u, 25u, 26u, 27u, 28u, 29u, 30u};
+OWN_ALIGNED_64_ARRAY(static uint32_t shift_table_15u_0[16]) = {0u, 14u, 12u, 10u, 8u, 6u, 4u, 2u,
+                                                               0u, 14u, 12u, 10u, 8u, 6u, 4u, 2u};
+OWN_ALIGNED_64_ARRAY(static uint32_t shift_table_15u_1[16]) = {1u, 3u, 5u, 7u, 9u, 11u, 13u, 15u,
+                                                               1u, 3u, 5u, 7u, 9u, 11u, 13u, 15u};
+
+template <typename OutType>
+const uint8_t* unpack_Nu16u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t bit_width, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_Nu16u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t bit_width,
+                                   uint16_t* dst_ptr) {
+    uint32_t mask = OWN_BIT_MASK(bit_width);
+    uint32_t next_word;
+    uint32_t bits_in_buf = OWN_WORD_WIDTH;
+    uint16_t* src16u_ptr = (uint16_t*)src_ptr;
+    uint16_t* dst16u_ptr = (uint16_t*)dst_ptr;
+    uint32_t src = (uint32_t)(*src16u_ptr);
+    src16u_ptr++;
+
+    while (1u < values_to_read) {
+        if (bit_width > bits_in_buf) {
+            next_word = (uint32_t)(*src16u_ptr);
+            src16u_ptr++;
+            next_word = next_word << bits_in_buf;
+            src = src | next_word;
+            bits_in_buf += OWN_WORD_WIDTH;
+        }
+        *dst16u_ptr = (uint16_t)(src & mask);
+        src = src >> bit_width;
+        bits_in_buf -= bit_width;
+        dst16u_ptr++;
+        values_to_read--;
+    }
+
+    if (bit_width > bits_in_buf) {
+        next_word = (uint32_t)(bit_width - bits_in_buf > 8u ? *src16u_ptr : *((uint8_t*)src16u_ptr));
+        next_word = next_word << bits_in_buf;
+        src = src | next_word;
+    }
+    *dst16u_ptr = (uint16_t)(src & mask);
+    return (uint8_t*)(src16u_ptr);
+}
+
+template <>
+inline const uint8_t* unpack_Nu16u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t bit_width,
+                                   uint32_t* dst_ptr) {
+    printf("=== test wwq unpack_Nu16u ===\n");
+    uint32_t mask = OWN_BIT_MASK(bit_width);
+    uint32_t next_word;
+    uint32_t bits_in_buf = OWN_WORD_WIDTH;
+    uint16_t* src16u_ptr = (uint16_t*)src_ptr;
+    uint32_t* dst32u_ptr = (uint32_t*)dst_ptr;
+    uint32_t src = (uint32_t)(*src16u_ptr);
+    src16u_ptr++;
+
+    while (1u < values_to_read) {
+        if (bit_width > bits_in_buf) {
+            next_word = (uint32_t)(*src16u_ptr);
+            src16u_ptr++;
+            next_word = next_word << bits_in_buf;
+            src = src | next_word;
+            bits_in_buf += OWN_WORD_WIDTH;
+        }
+        *dst32u_ptr = (uint32_t)(src & mask);
+        src = src >> bit_width;
+        bits_in_buf -= bit_width;
+        dst32u_ptr++;
+        values_to_read--;
+    }
+
+    if (bit_width > bits_in_buf) {
+        next_word = (uint32_t)(bit_width - bits_in_buf > 8u ? *src16u_ptr : *((uint8_t*)src16u_ptr));
+        next_word = next_word << bits_in_buf;
+        src = src | next_word;
+    }
+    *dst32u_ptr = (uint32_t)(src & mask);
+    return (uint8_t*)(src16u_ptr);
+}
+
+template <>
+inline const uint8_t* unpack_Nu16u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t bit_width,
+                                   uint64_t* dst_ptr) {
+    uint32_t mask = OWN_BIT_MASK(bit_width);
+    uint32_t next_word;
+    uint32_t bits_in_buf = OWN_WORD_WIDTH;
+    uint16_t* src16u_ptr = (uint16_t*)src_ptr;
+    uint64_t* dst64u_ptr = (uint64_t*)dst_ptr;
+    uint32_t src = (uint32_t)(*src16u_ptr);
+    src16u_ptr++;
+
+    while (1u < values_to_read) {
+        if (bit_width > bits_in_buf) {
+            next_word = (uint32_t)(*src16u_ptr);
+            src16u_ptr++;
+            next_word = next_word << bits_in_buf;
+            src = src | next_word;
+            bits_in_buf += OWN_WORD_WIDTH;
+        }
+        *dst64u_ptr = (uint64_t)(src & mask);
+        src = src >> bit_width;
+        bits_in_buf -= bit_width;
+        dst64u_ptr++;
+        values_to_read--;
+    }
+
+    if (bit_width > bits_in_buf) {
+        next_word = (uint32_t)(bit_width - bits_in_buf > 8u ? *src16u_ptr : *((uint8_t*)src16u_ptr));
+        next_word = next_word << bits_in_buf;
+        src = src | next_word;
+    }
+    *dst64u_ptr = (uint64_t)(src & mask);
+    return (uint8_t*)(src16u_ptr);
+}
+
+template <typename OutType>
+const uint8_t* unpack_9u16u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_9u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_9u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint16_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(9u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(9u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_9u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_9u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_9u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_9u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 4u * 9u;
+            dst_ptr += 32u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 9u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_9u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(9u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(9u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_9u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_9u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_9u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_9u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_16u32u(zmm[0], dst_ptr);
+
+            src_ptr += 4u * 9u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 9u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_9u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(9u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(9u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_9u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_9u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_9u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_9u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_16u64u(zmm[0], dst_ptr);
+
+            src_ptr += 4u * 9u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 9u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <typename OutType>
+const uint8_t* unpack_10u16u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_10u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_10u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint16_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(10u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(10u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_10u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_10u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_10u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_10u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 4u * 10u;
+            dst_ptr += 32u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 10u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_10u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(10u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(10u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_10u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_10u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_10u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_10u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_16u32u(zmm[0], dst_ptr);
+
+            src_ptr += 4u * 10u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 10u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_10u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(10u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(10u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_10u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_10u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_10u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_10u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_16u64u(zmm[0], dst_ptr);
+
+            src_ptr += 4u * 10u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 10u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <typename OutType>
+const uint8_t* unpack_11u16u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_11u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_11u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint16_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(11u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(11u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_11u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_11u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_11u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_11u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 4u * 11u;
+            dst_ptr += 32u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 11u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_11u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(11u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(11u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_11u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_11u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_11u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_11u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_16u32u(zmm[0], dst_ptr);
+
+            src_ptr += 4u * 11u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 11u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_11u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(11u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(11u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_11u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_11u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_11u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_11u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_16u64u(zmm[0], dst_ptr);
+
+            src_ptr += 4u * 11u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 11u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <typename OutType>
+const uint8_t* unpack_12u16u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_12u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_12u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint16_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(12u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(12u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_12u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_12u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_12u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_12u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 4u * 12u;
+            dst_ptr += 32u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 12u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_12u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(12u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(12u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_12u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_12u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_12u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_12u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_16u32u(zmm[0], dst_ptr);
+
+            src_ptr += 4u * 12u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 12u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_12u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(12u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(12u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_12u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_12u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_12u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_12u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_16u64u(zmm[0], dst_ptr);
+
+            src_ptr += 4u * 12u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 12u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <typename OutType>
+const uint8_t* unpack_13u16u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_13u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_13u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint16_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(13u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(13u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_13u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_13u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_13u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_13u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 4u * 13u;
+            dst_ptr += 32u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 13u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_13u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    printf("=== test wwq unpack_13u16u ===\n");
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(13u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(13u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_13u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_13u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_13u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_13u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_16u32u(zmm[0], dst_ptr);
+
+            src_ptr += 4u * 13u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 13u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_13u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(13u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(13u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_13u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_13u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_13u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_13u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_16u64u(zmm[0], dst_ptr);
+
+            src_ptr += 4u * 13u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 13u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <typename OutType>
+const uint8_t* unpack_14u16u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_14u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_14u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint16_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(14u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(14u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_14u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_14u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_14u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_14u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 4u * 14u;
+            dst_ptr += 32u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 14u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_14u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(14u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(14u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_14u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_14u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_14u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_14u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_16u32u(zmm[0], dst_ptr);
+
+            src_ptr += 4u * 14u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 14u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_14u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(14u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(14u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_14u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_14u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_14u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_14u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_16u64u(zmm[0], dst_ptr);
+
+            src_ptr += 4u * 14u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 14u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <typename OutType>
+const uint8_t* unpack_15u16u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_15u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_15u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint16_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(15u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(15u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_15u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_15u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_15u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_15u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 4u * 15u;
+            dst_ptr += 32u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 15u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_15u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(15u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(15u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_15u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_15u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_15u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_15u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_16u32u(zmm[0], dst_ptr);
+
+            src_ptr += 4u * 15u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 15u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_15u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 32u) {
+        __mmask32 read_mask = OWN_BIT_MASK(OWN_BITS_2_WORD(15u * OWN_DWORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi16(OWN_BIT_MASK(15u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_15u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_15u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_15u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_15u_1);
+
+        while (values_to_read >= 32u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi16(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi16(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi32(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi32(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi16(zmm[0], 0xAAAAAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_16u64u(zmm[0], dst_ptr);
+
+            src_ptr += 4u * 15u;
+            values_to_read -= 32u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu16u(src_ptr, values_to_read, 15u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <typename OutType>
+const uint8_t* unpack_16u16u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_16u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_16u16u(const uint8_t* src_ptr, uint32_t values_to_read, uint16_t* dst_ptr) {
+    memcpy(dst_ptr, src_ptr, values_to_read * 2);

Review Comment:
   warning: use of undeclared identifier 'memcpy' [clang-diagnostic-error]
   ```cpp
       memcpy(dst_ptr, src_ptr, values_to_read * 2);
       ^
   ```
   



##########
be/src/util/bitpacking/unpack_def.h:
##########
@@ -0,0 +1,92 @@
+// Copyright 2021-present StarRocks, Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <immintrin.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#define OWN_ALIGNED_ARRAY(array_declaration, alignment) array_declaration __attribute__((aligned(alignment)))
+#define OWN_ALIGNED_64_ARRAY(array_declaration) OWN_ALIGNED_ARRAY(array_declaration, 64u)
+
+/**
+ * @brief Defines internal inline Intel core function
+ */
+
+#define OWN_MAX_16U 0xFFFF                                 /**< Max value for uint16_t */

Review Comment:
   warning: macro is not used [clang-diagnostic-unused-macros]
   ```cpp
   #define OWN_MAX_16U 0xFFFF                                 /**< Max value for uint16_t */
           ^
   ```
   



##########
be/src/util/bitpacking/unpack_def.h:
##########
@@ -0,0 +1,92 @@
+// Copyright 2021-present StarRocks, Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <immintrin.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#define OWN_ALIGNED_ARRAY(array_declaration, alignment) array_declaration __attribute__((aligned(alignment)))
+#define OWN_ALIGNED_64_ARRAY(array_declaration) OWN_ALIGNED_ARRAY(array_declaration, 64u)
+
+/**
+ * @brief Defines internal inline Intel core function
+ */
+
+#define OWN_MAX_16U 0xFFFF                                 /**< Max value for uint16_t */
+#define OWN_MAX_32U 0xFFFFFFFF                             /**< Max value for uint32_t */

Review Comment:
   warning: macro is not used [clang-diagnostic-unused-macros]
   ```cpp
   #define OWN_MAX_32U 0xFFFFFFFF                             /**< Max value for uint32_t */
           ^
   ```
   



##########
be/src/util/bitpacking/unpack_def.h:
##########
@@ -0,0 +1,92 @@
+// Copyright 2021-present StarRocks, Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <immintrin.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#define OWN_ALIGNED_ARRAY(array_declaration, alignment) array_declaration __attribute__((aligned(alignment)))
+#define OWN_ALIGNED_64_ARRAY(array_declaration) OWN_ALIGNED_ARRAY(array_declaration, 64u)
+
+/**
+ * @brief Defines internal inline Intel core function
+ */
+
+#define OWN_MAX_16U 0xFFFF                                 /**< Max value for uint16_t */
+#define OWN_MAX_32U 0xFFFFFFFF                             /**< Max value for uint32_t */
+#define OWN_1_BIT_MASK 1u                                  /**< Mask for 1-bit integer */
+#define OWN_2_BIT_MASK 3u                                  /**< Mask for 2-bit integer */

Review Comment:
   warning: macro is not used [clang-diagnostic-unused-macros]
   ```cpp
   #define OWN_2_BIT_MASK 3u                                  /**< Mask for 2-bit integer */
           ^
   ```
   



##########
be/src/util/bitpacking/unpack_def.h:
##########
@@ -0,0 +1,92 @@
+// Copyright 2021-present StarRocks, Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <immintrin.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#define OWN_ALIGNED_ARRAY(array_declaration, alignment) array_declaration __attribute__((aligned(alignment)))
+#define OWN_ALIGNED_64_ARRAY(array_declaration) OWN_ALIGNED_ARRAY(array_declaration, 64u)
+
+/**
+ * @brief Defines internal inline Intel core function
+ */
+
+#define OWN_MAX_16U 0xFFFF                                 /**< Max value for uint16_t */
+#define OWN_MAX_32U 0xFFFFFFFF                             /**< Max value for uint32_t */
+#define OWN_1_BIT_MASK 1u                                  /**< Mask for 1-bit integer */

Review Comment:
   warning: macro is not used [clang-diagnostic-unused-macros]
   ```cpp
   #define OWN_1_BIT_MASK 1u                                  /**< Mask for 1-bit integer */
           ^
   ```
   



##########
be/src/util/bitpacking/unpack_def.h:
##########
@@ -0,0 +1,92 @@
+// Copyright 2021-present StarRocks, Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <immintrin.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#define OWN_ALIGNED_ARRAY(array_declaration, alignment) array_declaration __attribute__((aligned(alignment)))
+#define OWN_ALIGNED_64_ARRAY(array_declaration) OWN_ALIGNED_ARRAY(array_declaration, 64u)
+
+/**
+ * @brief Defines internal inline Intel core function
+ */
+
+#define OWN_MAX_16U 0xFFFF                                 /**< Max value for uint16_t */
+#define OWN_MAX_32U 0xFFFFFFFF                             /**< Max value for uint32_t */
+#define OWN_1_BIT_MASK 1u                                  /**< Mask for 1-bit integer */
+#define OWN_2_BIT_MASK 3u                                  /**< Mask for 2-bit integer */
+#define OWN_3_BIT_MASK 7u                                  /**< Mask for 3-bit integer */
+#define OWN_4_BIT_MASK 0xfu                                /**< Mask for 4-bit integer */
+#define OWN_5_BIT_MASK 0x1fu                               /**< Mask for 5-bit integer */
+#define OWN_6_BIT_MASK 0x3fu                               /**< Mask for 6-bit integer */
+#define OWN_7_BIT_MASK 0x7fu                               /**< Mask for 7-bit integer */
+#define OWN_HIGH_BIT_MASK 0x80u                            /**< Mask for most significant bit in a byte */
+#define OWN_LOW_BIT_MASK 1u                                /**< Mask for least significant bit in a byte */

Review Comment:
   warning: macro is not used [clang-diagnostic-unused-macros]
   ```cpp
   #define OWN_LOW_BIT_MASK 1u                                /**< Mask for least significant bit in a byte */
           ^
   ```
   



##########
be/src/util/bitpacking/unpack_def.h:
##########
@@ -0,0 +1,92 @@
+// Copyright 2021-present StarRocks, Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <immintrin.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#define OWN_ALIGNED_ARRAY(array_declaration, alignment) array_declaration __attribute__((aligned(alignment)))
+#define OWN_ALIGNED_64_ARRAY(array_declaration) OWN_ALIGNED_ARRAY(array_declaration, 64u)
+
+/**
+ * @brief Defines internal inline Intel core function
+ */
+
+#define OWN_MAX_16U 0xFFFF                                 /**< Max value for uint16_t */
+#define OWN_MAX_32U 0xFFFFFFFF                             /**< Max value for uint32_t */
+#define OWN_1_BIT_MASK 1u                                  /**< Mask for 1-bit integer */
+#define OWN_2_BIT_MASK 3u                                  /**< Mask for 2-bit integer */
+#define OWN_3_BIT_MASK 7u                                  /**< Mask for 3-bit integer */
+#define OWN_4_BIT_MASK 0xfu                                /**< Mask for 4-bit integer */
+#define OWN_5_BIT_MASK 0x1fu                               /**< Mask for 5-bit integer */
+#define OWN_6_BIT_MASK 0x3fu                               /**< Mask for 6-bit integer */
+#define OWN_7_BIT_MASK 0x7fu                               /**< Mask for 7-bit integer */
+#define OWN_HIGH_BIT_MASK 0x80u                            /**< Mask for most significant bit in a byte */
+#define OWN_LOW_BIT_MASK 1u                                /**< Mask for least significant bit in a byte */
+#define OWN_BYTE_WIDTH 8u                                  /**< Byte width in bits */

Review Comment:
   warning: macro is not used [clang-diagnostic-unused-macros]
   ```cpp
   #define OWN_BYTE_WIDTH 8u                                  /**< Byte width in bits */
           ^
   ```
   



##########
be/src/util/bitpacking/unpack_def.h:
##########
@@ -0,0 +1,92 @@
+// Copyright 2021-present StarRocks, Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <immintrin.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#define OWN_ALIGNED_ARRAY(array_declaration, alignment) array_declaration __attribute__((aligned(alignment)))
+#define OWN_ALIGNED_64_ARRAY(array_declaration) OWN_ALIGNED_ARRAY(array_declaration, 64u)
+
+/**
+ * @brief Defines internal inline Intel core function
+ */
+
+#define OWN_MAX_16U 0xFFFF                                 /**< Max value for uint16_t */
+#define OWN_MAX_32U 0xFFFFFFFF                             /**< Max value for uint32_t */
+#define OWN_1_BIT_MASK 1u                                  /**< Mask for 1-bit integer */
+#define OWN_2_BIT_MASK 3u                                  /**< Mask for 2-bit integer */
+#define OWN_3_BIT_MASK 7u                                  /**< Mask for 3-bit integer */
+#define OWN_4_BIT_MASK 0xfu                                /**< Mask for 4-bit integer */
+#define OWN_5_BIT_MASK 0x1fu                               /**< Mask for 5-bit integer */
+#define OWN_6_BIT_MASK 0x3fu                               /**< Mask for 6-bit integer */
+#define OWN_7_BIT_MASK 0x7fu                               /**< Mask for 7-bit integer */
+#define OWN_HIGH_BIT_MASK 0x80u                            /**< Mask for most significant bit in a byte */
+#define OWN_LOW_BIT_MASK 1u                                /**< Mask for least significant bit in a byte */
+#define OWN_BYTE_WIDTH 8u                                  /**< Byte width in bits */
+#define OWN_WORD_WIDTH 16u                                 /**< Word width in bits */
+#define OWN_3_BYTE_WIDTH 24u                               /**< 3-byte width in bits */
+#define OWN_DWORD_WIDTH 32u                                /**< Dword width in bits */
+#define OWN_6_BYTE_WIDTH 48u                               /**< 6-byte width in bits */
+#define OWN_7_BYTE_WIDTH 56u                               /**< 7-byte width in bits */
+#define OWN_QWORD_WIDTH 64u                                /**< Qword width in bits */
+#define OWN_RLE_BURST_MAX_COUNT 65535u                     /**< Maximum count for 32u rle_burst operation */
+#define OWN_BIT_MASK(x) (((1ULL) << (x)) - 1u)             /**< Bit mask below bit position */
+#define OWN_PARQUET_WIDTH 8u                               /**< Parquet size in elements (PRLE format) */

Review Comment:
   warning: macro is not used [clang-diagnostic-unused-macros]
   ```cpp
   #define OWN_PARQUET_WIDTH 8u                               /**< Parquet size in elements (PRLE format) */
           ^
   ```
   



##########
be/src/util/bitpacking/unpack_def.h:
##########
@@ -0,0 +1,92 @@
+// Copyright 2021-present StarRocks, Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <immintrin.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#define OWN_ALIGNED_ARRAY(array_declaration, alignment) array_declaration __attribute__((aligned(alignment)))
+#define OWN_ALIGNED_64_ARRAY(array_declaration) OWN_ALIGNED_ARRAY(array_declaration, 64u)
+
+/**
+ * @brief Defines internal inline Intel core function
+ */
+
+#define OWN_MAX_16U 0xFFFF                                 /**< Max value for uint16_t */
+#define OWN_MAX_32U 0xFFFFFFFF                             /**< Max value for uint32_t */
+#define OWN_1_BIT_MASK 1u                                  /**< Mask for 1-bit integer */
+#define OWN_2_BIT_MASK 3u                                  /**< Mask for 2-bit integer */
+#define OWN_3_BIT_MASK 7u                                  /**< Mask for 3-bit integer */
+#define OWN_4_BIT_MASK 0xfu                                /**< Mask for 4-bit integer */
+#define OWN_5_BIT_MASK 0x1fu                               /**< Mask for 5-bit integer */
+#define OWN_6_BIT_MASK 0x3fu                               /**< Mask for 6-bit integer */
+#define OWN_7_BIT_MASK 0x7fu                               /**< Mask for 7-bit integer */
+#define OWN_HIGH_BIT_MASK 0x80u                            /**< Mask for most significant bit in a byte */
+#define OWN_LOW_BIT_MASK 1u                                /**< Mask for least significant bit in a byte */
+#define OWN_BYTE_WIDTH 8u                                  /**< Byte width in bits */
+#define OWN_WORD_WIDTH 16u                                 /**< Word width in bits */
+#define OWN_3_BYTE_WIDTH 24u                               /**< 3-byte width in bits */
+#define OWN_DWORD_WIDTH 32u                                /**< Dword width in bits */

Review Comment:
   warning: macro is not used [clang-diagnostic-unused-macros]
   ```cpp
   #define OWN_DWORD_WIDTH 32u                                /**< Dword width in bits */
           ^
   ```
   



##########
be/src/util/bitpacking/unpack_def.h:
##########
@@ -0,0 +1,92 @@
+// Copyright 2021-present StarRocks, Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <immintrin.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#define OWN_ALIGNED_ARRAY(array_declaration, alignment) array_declaration __attribute__((aligned(alignment)))
+#define OWN_ALIGNED_64_ARRAY(array_declaration) OWN_ALIGNED_ARRAY(array_declaration, 64u)
+
+/**
+ * @brief Defines internal inline Intel core function
+ */
+
+#define OWN_MAX_16U 0xFFFF                                 /**< Max value for uint16_t */
+#define OWN_MAX_32U 0xFFFFFFFF                             /**< Max value for uint32_t */
+#define OWN_1_BIT_MASK 1u                                  /**< Mask for 1-bit integer */
+#define OWN_2_BIT_MASK 3u                                  /**< Mask for 2-bit integer */
+#define OWN_3_BIT_MASK 7u                                  /**< Mask for 3-bit integer */
+#define OWN_4_BIT_MASK 0xfu                                /**< Mask for 4-bit integer */
+#define OWN_5_BIT_MASK 0x1fu                               /**< Mask for 5-bit integer */
+#define OWN_6_BIT_MASK 0x3fu                               /**< Mask for 6-bit integer */
+#define OWN_7_BIT_MASK 0x7fu                               /**< Mask for 7-bit integer */
+#define OWN_HIGH_BIT_MASK 0x80u                            /**< Mask for most significant bit in a byte */
+#define OWN_LOW_BIT_MASK 1u                                /**< Mask for least significant bit in a byte */
+#define OWN_BYTE_WIDTH 8u                                  /**< Byte width in bits */
+#define OWN_WORD_WIDTH 16u                                 /**< Word width in bits */
+#define OWN_3_BYTE_WIDTH 24u                               /**< 3-byte width in bits */
+#define OWN_DWORD_WIDTH 32u                                /**< Dword width in bits */
+#define OWN_6_BYTE_WIDTH 48u                               /**< 6-byte width in bits */
+#define OWN_7_BYTE_WIDTH 56u                               /**< 7-byte width in bits */
+#define OWN_QWORD_WIDTH 64u                                /**< Qword width in bits */
+#define OWN_RLE_BURST_MAX_COUNT 65535u                     /**< Maximum count for 32u rle_burst operation */
+#define OWN_BIT_MASK(x) (((1ULL) << (x)) - 1u)             /**< Bit mask below bit position */
+#define OWN_PARQUET_WIDTH 8u                               /**< Parquet size in elements (PRLE format) */
+#define OWN_LITERAL_OCTA_GROUP 1u                          /**< PRLE format description */
+#define OWN_VARINT_BYTE_1(x) (((x)&OWN_7_BIT_MASK) << 6u)  /**< 1st byte extraction for varint format */

Review Comment:
   warning: macro is not used [clang-diagnostic-unused-macros]
   ```cpp
   #define OWN_VARINT_BYTE_1(x) (((x)&OWN_7_BIT_MASK) << 6u)  /**< 1st byte extraction for varint format */
           ^
   ```
   



##########
be/src/util/bitpacking/unpack_def.h:
##########
@@ -0,0 +1,92 @@
+// Copyright 2021-present StarRocks, Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <immintrin.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#define OWN_ALIGNED_ARRAY(array_declaration, alignment) array_declaration __attribute__((aligned(alignment)))
+#define OWN_ALIGNED_64_ARRAY(array_declaration) OWN_ALIGNED_ARRAY(array_declaration, 64u)
+
+/**
+ * @brief Defines internal inline Intel core function
+ */
+
+#define OWN_MAX_16U 0xFFFF                                 /**< Max value for uint16_t */
+#define OWN_MAX_32U 0xFFFFFFFF                             /**< Max value for uint32_t */
+#define OWN_1_BIT_MASK 1u                                  /**< Mask for 1-bit integer */
+#define OWN_2_BIT_MASK 3u                                  /**< Mask for 2-bit integer */
+#define OWN_3_BIT_MASK 7u                                  /**< Mask for 3-bit integer */
+#define OWN_4_BIT_MASK 0xfu                                /**< Mask for 4-bit integer */
+#define OWN_5_BIT_MASK 0x1fu                               /**< Mask for 5-bit integer */

Review Comment:
   warning: macro is not used [clang-diagnostic-unused-macros]
   ```cpp
   #define OWN_5_BIT_MASK 0x1fu                               /**< Mask for 5-bit integer */
           ^
   ```
   



##########
be/src/util/bitpacking/unpack_def.h:
##########
@@ -0,0 +1,92 @@
+// Copyright 2021-present StarRocks, Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <immintrin.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#define OWN_ALIGNED_ARRAY(array_declaration, alignment) array_declaration __attribute__((aligned(alignment)))
+#define OWN_ALIGNED_64_ARRAY(array_declaration) OWN_ALIGNED_ARRAY(array_declaration, 64u)

Review Comment:
   warning: macro is not used [clang-diagnostic-unused-macros]
   ```cpp
   #define OWN_ALIGNED_64_ARRAY(array_declaration) OWN_ALIGNED_ARRAY(array_declaration, 64u)
           ^
   ```
   



##########
be/src/util/bitpacking/unpack_8u.h:
##########
@@ -0,0 +1,3349 @@
+// Copyright 2021-present StarRocks, Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "extend_8u.h"
+#include "unpack_def.h"
+
+OWN_ALIGNED_64_ARRAY(const uint32_t p_permutex_masks_3u[32]) = {
+        0x0,      0x10020,  0x210021, 0x220002, 0x30003,  0x40023,  0x240024, 0x250005, 0x60006,  0x70026,  0x270027,
+        0x280008, 0x90009,  0xA0029,  0x2A002A, 0x2B000B, 0x200000, 0x10020,  0x20021,  0x220002, 0x230003, 0x40023,
+        0x50024,  0x250005, 0x260006, 0x70026,  0x80027,  0x280008, 0x290009, 0xA0029,  0xB002A,  0x2B000B};
+
+OWN_ALIGNED_64_ARRAY(const uint32_t p_permutex_masks_5u[32]) = {
+        0x200000, 0x210001, 0x30022,  0x40023,  0x250005, 0x260006, 0x80027,  0x90028,  0x2A000A, 0x2B000B, 0xD002C,
+        0xE002D,  0x2F000F, 0x300010, 0x120031, 0x130032, 0x200000, 0x20021,  0x30022,  0x240004, 0x250005, 0x70026,
+        0x80027,  0x290009, 0x2A000A, 0xC002B,  0xD002C,  0x2E000E, 0x2F000F, 0x110030, 0x120031, 0x330013};
+
+OWN_ALIGNED_64_ARRAY(const uint32_t p_permutex_masks_6u[32]) = {
+        0x200000, 0x20021,  0x230003, 0x50024,  0x260006, 0x80027,  0x290009, 0xB002A,  0x2C000C, 0xE002D, 0x2F000F,
+        0x110030, 0x320012, 0x140033, 0x350015, 0x170036, 0x10000,  0x220021, 0x40003,  0x250024, 0x70006, 0x280027,
+        0xA0009,  0x2B002A, 0xD000C,  0x2E002D, 0x10000F, 0x310030, 0x130012, 0x340033, 0x160015, 0x370036};
+OWN_ALIGNED_64_ARRAY(const uint32_t p_permutex_masks_7u[32]) = {
+        0x200000, 0x220021, 0x40023,  0x60005,  0x270007, 0x290028, 0xB002A,  0xD000C,  0x2E000E, 0x30002F, 0x120031,
+        0x140013, 0x350015, 0x370036, 0x190038, 0x1B001A, 0x10000,  0x30002,  0x240023, 0x260025, 0x80007,  0xA0009,
+        0x2B002A, 0x2D002C, 0xF000E,  0x110010, 0x320031, 0x340033, 0x160015, 0x180017, 0x390038, 0x3B003A};
+
+// ------------------------------------ 3u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint16_t shift_table_3u_0[32]) = {0u, 6u, 4u, 2u, 0u, 6u, 4u, 2u, 0u, 6u, 4u,
+                                                              2u, 0u, 6u, 4u, 2u, 0u, 6u, 4u, 2u, 0u, 6u,
+                                                              4u, 2u, 0u, 6u, 4u, 2u, 0u, 6u, 4u, 2u};
+OWN_ALIGNED_64_ARRAY(static uint16_t shift_table_3u_1[32]) = {5u, 7u, 1u, 3u, 5u, 7u, 1u, 3u, 5u, 7u, 1u,
+                                                              3u, 5u, 7u, 1u, 3u, 5u, 7u, 1u, 3u, 5u, 7u,
+                                                              1u, 3u, 5u, 7u, 1u, 3u, 5u, 7u, 1u, 3u};
+
+// ------------------------------------ 5u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint16_t shift_table_5u_0[32]) = {0u, 2u, 4u, 6u, 0u, 2u, 4u, 6u, 0u, 2u, 4u,
+                                                              6u, 0u, 2u, 4u, 6u, 0u, 2u, 4u, 6u, 0u, 2u,
+                                                              4u, 6u, 0u, 2u, 4u, 6u, 0u, 2u, 4u, 6u};
+OWN_ALIGNED_64_ARRAY(static uint16_t shift_table_5u_1[32]) = {3u, 1u, 7u, 5u, 3u, 1u, 7u, 5u, 3u, 1u, 7u,
+                                                              5u, 3u, 1u, 7u, 5u, 3u, 1u, 7u, 5u, 3u, 1u,
+                                                              7u, 5u, 3u, 1u, 7u, 5u, 3u, 1u, 7u, 5u};
+
+// ------------------------------------ 6u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint16_t shift_table_6u_0[32]) = {0u, 4u, 0u, 4u, 0u, 4u, 0u, 4u, 0u, 4u, 0u,
+                                                              4u, 0u, 4u, 0u, 4u, 0u, 4u, 0u, 4u, 0u, 4u,
+                                                              0u, 4u, 0u, 4u, 0u, 4u, 0u, 4u, 0u, 4u};
+OWN_ALIGNED_64_ARRAY(static uint16_t shift_table_6u_1[32]) = {2u, 6u, 2u, 6u, 2u, 6u, 2u, 6u, 2u, 6u, 2u,
+                                                              6u, 2u, 6u, 2u, 6u, 2u, 6u, 2u, 6u, 2u, 6u,
+                                                              2u, 6u, 2u, 6u, 2u, 6u, 2u, 6u, 2u, 6u};
+
+// ------------------------------------ 7u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint16_t shift_table_7u_0[32]) = {0u, 6u, 4u, 2u, 0u, 6u, 4u, 2u, 0u, 6u, 4u,
+                                                              2u, 0u, 6u, 4u, 2u, 0u, 6u, 4u, 2u, 0u, 6u,
+                                                              4u, 2u, 0u, 6u, 4u, 2u, 0u, 6u, 4u, 2u};
+OWN_ALIGNED_64_ARRAY(static uint16_t shift_table_7u_1[32]) = {1u, 3u, 5u, 7u, 1u, 3u, 5u, 7u, 1u, 3u, 5u,
+                                                              7u, 1u, 3u, 5u, 7u, 1u, 3u, 5u, 7u, 1u, 3u,
+                                                              5u, 7u, 1u, 3u, 5u, 7u, 1u, 3u, 5u, 7u};
+
+// ********************** 0u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_0u8u(const uint8_t* src_ptr, int64_t values_to_read, OutType* dst_ptr);
+template <>
+inline const uint8_t* unpack_0u8u(const uint8_t* src_ptr, int64_t values_to_read, uint8_t* dst_ptr) {

Review Comment:
   warning: pointer parameter 'dst_ptr' can be pointer to const [readability-non-const-parameter]
   
   be/src/util/bitpacking/unpack_8u.h:70:
   ```diff
   - const uint8_t* unpack_0u8u(const uint8_t* src_ptr, int64_t values_to_read, OutType* dst_ptr);
   + const uint8_t* unpack_0u8u(const uint8_t* src_ptr, int64_t values_to_read, const OutType* dst_ptr);
   ```
   
   ```suggestion
   inline const uint8_t* unpack_0u8u(const uint8_t* src_ptr, int64_t values_to_read, const uint8_t* dst_ptr) {
   ```
   



##########
be/src/util/bitpacking/unpack_def.h:
##########
@@ -0,0 +1,92 @@
+// Copyright 2021-present StarRocks, Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <immintrin.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#define OWN_ALIGNED_ARRAY(array_declaration, alignment) array_declaration __attribute__((aligned(alignment)))
+#define OWN_ALIGNED_64_ARRAY(array_declaration) OWN_ALIGNED_ARRAY(array_declaration, 64u)
+
+/**
+ * @brief Defines internal inline Intel core function
+ */
+
+#define OWN_MAX_16U 0xFFFF                                 /**< Max value for uint16_t */
+#define OWN_MAX_32U 0xFFFFFFFF                             /**< Max value for uint32_t */
+#define OWN_1_BIT_MASK 1u                                  /**< Mask for 1-bit integer */
+#define OWN_2_BIT_MASK 3u                                  /**< Mask for 2-bit integer */
+#define OWN_3_BIT_MASK 7u                                  /**< Mask for 3-bit integer */
+#define OWN_4_BIT_MASK 0xfu                                /**< Mask for 4-bit integer */

Review Comment:
   warning: macro is not used [clang-diagnostic-unused-macros]
   ```cpp
   #define OWN_4_BIT_MASK 0xfu                                /**< Mask for 4-bit integer */
           ^
   ```
   



##########
be/src/util/bitpacking/unpack_8u.h:
##########
@@ -0,0 +1,3349 @@
+// Copyright 2021-present StarRocks, Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "extend_8u.h"
+#include "unpack_def.h"
+
+OWN_ALIGNED_64_ARRAY(const uint32_t p_permutex_masks_3u[32]) = {
+        0x0,      0x10020,  0x210021, 0x220002, 0x30003,  0x40023,  0x240024, 0x250005, 0x60006,  0x70026,  0x270027,
+        0x280008, 0x90009,  0xA0029,  0x2A002A, 0x2B000B, 0x200000, 0x10020,  0x20021,  0x220002, 0x230003, 0x40023,
+        0x50024,  0x250005, 0x260006, 0x70026,  0x80027,  0x280008, 0x290009, 0xA0029,  0xB002A,  0x2B000B};
+
+OWN_ALIGNED_64_ARRAY(const uint32_t p_permutex_masks_5u[32]) = {
+        0x200000, 0x210001, 0x30022,  0x40023,  0x250005, 0x260006, 0x80027,  0x90028,  0x2A000A, 0x2B000B, 0xD002C,
+        0xE002D,  0x2F000F, 0x300010, 0x120031, 0x130032, 0x200000, 0x20021,  0x30022,  0x240004, 0x250005, 0x70026,
+        0x80027,  0x290009, 0x2A000A, 0xC002B,  0xD002C,  0x2E000E, 0x2F000F, 0x110030, 0x120031, 0x330013};
+
+OWN_ALIGNED_64_ARRAY(const uint32_t p_permutex_masks_6u[32]) = {
+        0x200000, 0x20021,  0x230003, 0x50024,  0x260006, 0x80027,  0x290009, 0xB002A,  0x2C000C, 0xE002D, 0x2F000F,
+        0x110030, 0x320012, 0x140033, 0x350015, 0x170036, 0x10000,  0x220021, 0x40003,  0x250024, 0x70006, 0x280027,
+        0xA0009,  0x2B002A, 0xD000C,  0x2E002D, 0x10000F, 0x310030, 0x130012, 0x340033, 0x160015, 0x370036};
+OWN_ALIGNED_64_ARRAY(const uint32_t p_permutex_masks_7u[32]) = {
+        0x200000, 0x220021, 0x40023,  0x60005,  0x270007, 0x290028, 0xB002A,  0xD000C,  0x2E000E, 0x30002F, 0x120031,
+        0x140013, 0x350015, 0x370036, 0x190038, 0x1B001A, 0x10000,  0x30002,  0x240023, 0x260025, 0x80007,  0xA0009,
+        0x2B002A, 0x2D002C, 0xF000E,  0x110010, 0x320031, 0x340033, 0x160015, 0x180017, 0x390038, 0x3B003A};
+
+// ------------------------------------ 3u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint16_t shift_table_3u_0[32]) = {0u, 6u, 4u, 2u, 0u, 6u, 4u, 2u, 0u, 6u, 4u,
+                                                              2u, 0u, 6u, 4u, 2u, 0u, 6u, 4u, 2u, 0u, 6u,
+                                                              4u, 2u, 0u, 6u, 4u, 2u, 0u, 6u, 4u, 2u};
+OWN_ALIGNED_64_ARRAY(static uint16_t shift_table_3u_1[32]) = {5u, 7u, 1u, 3u, 5u, 7u, 1u, 3u, 5u, 7u, 1u,
+                                                              3u, 5u, 7u, 1u, 3u, 5u, 7u, 1u, 3u, 5u, 7u,
+                                                              1u, 3u, 5u, 7u, 1u, 3u, 5u, 7u, 1u, 3u};
+
+// ------------------------------------ 5u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint16_t shift_table_5u_0[32]) = {0u, 2u, 4u, 6u, 0u, 2u, 4u, 6u, 0u, 2u, 4u,
+                                                              6u, 0u, 2u, 4u, 6u, 0u, 2u, 4u, 6u, 0u, 2u,
+                                                              4u, 6u, 0u, 2u, 4u, 6u, 0u, 2u, 4u, 6u};
+OWN_ALIGNED_64_ARRAY(static uint16_t shift_table_5u_1[32]) = {3u, 1u, 7u, 5u, 3u, 1u, 7u, 5u, 3u, 1u, 7u,
+                                                              5u, 3u, 1u, 7u, 5u, 3u, 1u, 7u, 5u, 3u, 1u,
+                                                              7u, 5u, 3u, 1u, 7u, 5u, 3u, 1u, 7u, 5u};
+
+// ------------------------------------ 6u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint16_t shift_table_6u_0[32]) = {0u, 4u, 0u, 4u, 0u, 4u, 0u, 4u, 0u, 4u, 0u,
+                                                              4u, 0u, 4u, 0u, 4u, 0u, 4u, 0u, 4u, 0u, 4u,
+                                                              0u, 4u, 0u, 4u, 0u, 4u, 0u, 4u, 0u, 4u};
+OWN_ALIGNED_64_ARRAY(static uint16_t shift_table_6u_1[32]) = {2u, 6u, 2u, 6u, 2u, 6u, 2u, 6u, 2u, 6u, 2u,
+                                                              6u, 2u, 6u, 2u, 6u, 2u, 6u, 2u, 6u, 2u, 6u,
+                                                              2u, 6u, 2u, 6u, 2u, 6u, 2u, 6u, 2u, 6u};
+
+// ------------------------------------ 7u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint16_t shift_table_7u_0[32]) = {0u, 6u, 4u, 2u, 0u, 6u, 4u, 2u, 0u, 6u, 4u,
+                                                              2u, 0u, 6u, 4u, 2u, 0u, 6u, 4u, 2u, 0u, 6u,
+                                                              4u, 2u, 0u, 6u, 4u, 2u, 0u, 6u, 4u, 2u};
+OWN_ALIGNED_64_ARRAY(static uint16_t shift_table_7u_1[32]) = {1u, 3u, 5u, 7u, 1u, 3u, 5u, 7u, 1u, 3u, 5u,
+                                                              7u, 1u, 3u, 5u, 7u, 1u, 3u, 5u, 7u, 1u, 3u,
+                                                              5u, 7u, 1u, 3u, 5u, 7u, 1u, 3u, 5u, 7u};
+
+// ********************** 0u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_0u8u(const uint8_t* src_ptr, int64_t values_to_read, OutType* dst_ptr);
+template <>
+inline const uint8_t* unpack_0u8u(const uint8_t* src_ptr, int64_t values_to_read, uint8_t* dst_ptr) {
+    memset(dst_ptr, 0, values_to_read);

Review Comment:
   warning: use of undeclared identifier 'memset' [clang-diagnostic-error]
   ```cpp
       memset(dst_ptr, 0, values_to_read);
       ^
   ```
   



##########
be/src/util/bitpacking/unpack_def.h:
##########
@@ -0,0 +1,92 @@
+// Copyright 2021-present StarRocks, Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <immintrin.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#define OWN_ALIGNED_ARRAY(array_declaration, alignment) array_declaration __attribute__((aligned(alignment)))
+#define OWN_ALIGNED_64_ARRAY(array_declaration) OWN_ALIGNED_ARRAY(array_declaration, 64u)
+
+/**
+ * @brief Defines internal inline Intel core function
+ */
+
+#define OWN_MAX_16U 0xFFFF                                 /**< Max value for uint16_t */
+#define OWN_MAX_32U 0xFFFFFFFF                             /**< Max value for uint32_t */
+#define OWN_1_BIT_MASK 1u                                  /**< Mask for 1-bit integer */
+#define OWN_2_BIT_MASK 3u                                  /**< Mask for 2-bit integer */
+#define OWN_3_BIT_MASK 7u                                  /**< Mask for 3-bit integer */

Review Comment:
   warning: macro is not used [clang-diagnostic-unused-macros]
   ```cpp
   #define OWN_3_BIT_MASK 7u                                  /**< Mask for 3-bit integer */
           ^
   ```
   



##########
be/src/util/bitpacking/unpack_def.h:
##########
@@ -0,0 +1,92 @@
+// Copyright 2021-present StarRocks, Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <immintrin.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#define OWN_ALIGNED_ARRAY(array_declaration, alignment) array_declaration __attribute__((aligned(alignment)))
+#define OWN_ALIGNED_64_ARRAY(array_declaration) OWN_ALIGNED_ARRAY(array_declaration, 64u)
+
+/**
+ * @brief Defines internal inline Intel core function
+ */
+
+#define OWN_MAX_16U 0xFFFF                                 /**< Max value for uint16_t */
+#define OWN_MAX_32U 0xFFFFFFFF                             /**< Max value for uint32_t */
+#define OWN_1_BIT_MASK 1u                                  /**< Mask for 1-bit integer */
+#define OWN_2_BIT_MASK 3u                                  /**< Mask for 2-bit integer */
+#define OWN_3_BIT_MASK 7u                                  /**< Mask for 3-bit integer */
+#define OWN_4_BIT_MASK 0xfu                                /**< Mask for 4-bit integer */
+#define OWN_5_BIT_MASK 0x1fu                               /**< Mask for 5-bit integer */
+#define OWN_6_BIT_MASK 0x3fu                               /**< Mask for 6-bit integer */
+#define OWN_7_BIT_MASK 0x7fu                               /**< Mask for 7-bit integer */
+#define OWN_HIGH_BIT_MASK 0x80u                            /**< Mask for most significant bit in a byte */
+#define OWN_LOW_BIT_MASK 1u                                /**< Mask for least significant bit in a byte */
+#define OWN_BYTE_WIDTH 8u                                  /**< Byte width in bits */
+#define OWN_WORD_WIDTH 16u                                 /**< Word width in bits */
+#define OWN_3_BYTE_WIDTH 24u                               /**< 3-byte width in bits */

Review Comment:
   warning: macro is not used [clang-diagnostic-unused-macros]
   ```cpp
   #define OWN_3_BYTE_WIDTH 24u                               /**< 3-byte width in bits */
           ^
   ```
   



##########
be/src/util/bitpacking/unpack_32u.h:
##########
@@ -0,0 +1,1748 @@
+// Copyright 2021-present StarRocks, Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "extend_32u.h"
+#include "unpack_def.h"
+
+// ------------------------------------ 17u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_17u_0[16]) = {0u, 1u, 1u, 2u, 2u, 3u, 3u, 4u,
+                                                                      4u, 5u, 5u, 6u, 6u, 7u, 7u, 8u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_17u_1[16]) = {0u, 1u, 1u, 2u, 2u, 3u, 3u, 4u,
+                                                                      4u, 5u, 5u, 6u, 6u, 7u, 7u, 8u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_17u_0[8]) = {0u, 2u, 4u, 6u, 8u, 10u, 12u, 14u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_17u_1[8]) = {15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u};
+
+// ------------------------------------ 18u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_18u_0[16]) = {0u, 1u, 1u, 2u, 2u, 3u, 3u, 4u,
+                                                                      4u, 5u, 5u, 6u, 6u, 7u, 7u, 8u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_18u_1[16]) = {0u, 1u, 1u, 2u, 2u, 3u, 3u, 4u,
+                                                                      5u, 6u, 6u, 7u, 7u, 8u, 8u, 9u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_18u_0[8]) = {0u, 4u, 8u, 12u, 16u, 20u, 24u, 28u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_18u_1[8]) = {14u, 10u, 6u, 2u, 30u, 26u, 22u, 18u};
+
+// ------------------------------------ 19u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_19u_0[16]) = {0u, 1u, 1u, 2u, 2u, 3u, 3u, 4u,
+                                                                      4u, 5u, 5u, 6u, 7u, 8u, 8u, 9u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_19u_1[16]) = {0u, 1u, 1u, 2u, 2u, 3u, 4u, 5u,
+                                                                      5u, 6u, 6u, 7u, 7u, 8u, 8u, 9u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_19u_0[8]) = {0u, 6u, 12u, 18u, 24u, 30u, 4u, 10u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_19u_1[8]) = {13u, 7u, 1u, 27u, 21u, 15u, 9u, 3u};
+
+// ------------------------------------ 20u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_20u_0[16]) = {0u, 1u, 1u, 2u, 2u, 3u, 3u, 4u,
+                                                                      5u, 6u, 6u, 7u, 7u, 8u, 8u, 9u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_20u_1[16]) = {0u, 1u, 1u, 2u, 3u, 4u, 4u, 5u,
+                                                                      5u, 6u, 6u, 7u, 8u, 9u, 9u, 10u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_20u_0[8]) = {0u, 8u, 16u, 24u, 0u, 8u, 16u, 24u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_20u_1[8]) = {12u, 4u, 28u, 20u, 12u, 4u, 28u, 20u};
+
+// ------------------------------------ 21u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_21u_0[16]) = {0u, 1u, 1u, 2u, 2u, 3u, 3u, 4u,
+                                                                      5u, 6u, 6u, 7u, 7u, 8u, 9u, 10u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_21u_1[16]) = {0u, 1u, 1u, 2u, 3u, 4u, 4u, 5u,
+                                                                      5u, 6u, 7u, 8u, 8u, 9u, 9u, 10u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_21u_0[8]) = {0u, 10u, 20u, 30u, 8u, 18u, 28u, 6u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_21u_1[8]) = {11u, 1u, 23u, 13u, 3u, 25u, 15u, 5u};
+
+// ------------------------------------ 22u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_22u_0[16]) = {0u, 1u, 1u, 2u, 2u, 3u, 4u, 5u,
+                                                                      5u, 6u, 6u, 7u, 8u, 9u, 9u, 10u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_22u_1[16]) = {0u, 1u, 2u, 3u, 3u, 4u, 4u,  5u,
+                                                                      6u, 7u, 7u, 8u, 8u, 9u, 10u, 11u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_22u_0[8]) = {0u, 12u, 24u, 4u, 16u, 28u, 8u, 20u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_22u_1[8]) = {10u, 30u, 18u, 6u, 26u, 14u, 2u, 22u};
+
+// ------------------------------------ 23u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_23u_0[16]) = {0u, 1u, 1u, 2u, 2u, 3u, 4u,  5u,
+                                                                      5u, 6u, 7u, 8u, 8u, 9u, 10u, 11u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_23u_1[16]) = {0u, 1u, 2u, 3u, 3u, 4u,  5u,  6u,
+                                                                      6u, 7u, 7u, 8u, 9u, 10u, 10u, 11u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_23u_0[8]) = {0u, 14u, 28u, 10u, 24u, 6u, 20u, 2u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_23u_1[8]) = {9u, 27u, 13u, 31u, 17u, 3u, 21u, 7u};
+
+// ------------------------------------ 24u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_24u_0[16]) = {0u, 1u, 1u, 2u, 3u, 4u,  4u,  5u,
+                                                                      6u, 7u, 7u, 8u, 9u, 10u, 10u, 11u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_24u_1[16]) = {0u, 1u, 2u, 3u, 3u, 4u,  5u,  6u,
+                                                                      6u, 7u, 8u, 9u, 9u, 10u, 11u, 12u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_24u_0[8]) = {0u, 16u, 0u, 16u, 0u, 16u, 0u, 16u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_24u_1[8]) = {8u, 24u, 8u, 24u, 8u, 24u, 8u, 24u};
+
+// ------------------------------------ 25u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_25u_0[16]) = {0u, 1u, 1u, 2u, 3u, 4u,  4u,  5u,
+                                                                      6u, 7u, 7u, 8u, 9u, 10u, 10u, 11u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_25u_1[16]) = {0u, 1u, 2u, 3u, 3u,  4u,  5u,  6u,
+                                                                      7u, 8u, 8u, 9u, 10u, 11u, 11u, 12u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_25u_0[8]) = {0u, 18u, 4u, 22u, 8u, 26u, 12u, 30u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_25u_1[8]) = {7u, 21u, 3u, 17u, 31u, 13u, 27u, 9u};
+
+// ------------------------------------ 26u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_26u_0[16]) = {0u, 1u, 1u, 2u, 3u, 4u,  4u,  5u,
+                                                                      6u, 7u, 8u, 9u, 9u, 10u, 11u, 12u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_26u_1[16]) = {0u, 1u, 2u, 3u, 4u,  5u,  5u,  6u,
+                                                                      7u, 8u, 8u, 9u, 10u, 11u, 12u, 13u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_26u_0[8]) = {0u, 20u, 8u, 28u, 16u, 4u, 24u, 12u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_26u_1[8]) = {6u, 18u, 30u, 10u, 22u, 2u, 14u, 26u};
+
+// ------------------------------------ 27u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_27u_0[16]) = {0u, 1u, 1u, 2u, 3u,  4u,  5u,  6u,
+                                                                      6u, 7u, 8u, 9u, 10u, 11u, 11u, 12u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_27u_1[16]) = {0u, 1u, 2u, 3u,  4u,  5u,  5u,  6u,
+                                                                      7u, 8u, 9u, 10u, 10u, 11u, 12u, 13u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_27u_0[8]) = {0u, 22u, 12u, 2u, 24u, 14u, 4u, 26u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_27u_1[8]) = {5u, 15u, 25u, 3u, 13u, 23u, 1u, 11u};
+
+// ------------------------------------ 28u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_28u_0[16]) = {0u, 1u, 1u, 2u, 3u,  4u,  5u,  6u,
+                                                                      7u, 8u, 8u, 9u, 10u, 11u, 12u, 13u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_28u_1[16]) = {0u, 1u, 2u, 3u,  4u,  5u,  6u,  7u,
+                                                                      7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_28u_0[8]) = {0u, 24u, 16u, 8u, 0u, 24u, 16u, 8u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_28u_1[8]) = {4u, 12u, 20u, 28u, 4u, 12u, 20u, 28u};
+
+// ------------------------------------ 29u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_29u_0[16]) = {0u, 1u, 1u, 2u,  3u,  4u,  5u,  6u,
+                                                                      7u, 8u, 9u, 10u, 10u, 11u, 12u, 13u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_29u_1[16]) = {0u, 1u, 2u, 3u,  4u,  5u,  6u,  7u,
+                                                                      8u, 9u, 9u, 10u, 11u, 12u, 13u, 14u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_29u_0[8]) = {0u, 26u, 20u, 14u, 8u, 2u, 28u, 22u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_29u_1[8]) = {3u, 9u, 15u, 21u, 27u, 1u, 7u, 13u};
+
+// ------------------------------------ 30u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_30u_0[16]) = {0u, 1u, 1u, 2u,  3u,  4u,  5u,  6u,
+                                                                      7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_30u_1[16]) = {0u, 1u, 2u,  3u,  4u,  5u,  6u,  7u,
+                                                                      8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_30u_0[8]) = {0u, 28u, 24u, 20u, 16u, 12u, 8u, 4u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_30u_1[8]) = {2u, 6u, 10u, 14u, 18u, 22u, 26u, 30u};
+
+// ------------------------------------ 31u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_31u_0[16]) = {0u, 1u, 1u, 2u,  3u,  4u,  5u,  6u,
+                                                                      7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_31u_1[16]) = {0u, 1u, 2u,  3u,  4u,  5u,  6u,  7u,
+                                                                      8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_31u_0[8]) = {0u, 30u, 28u, 26u, 24u, 22u, 20u, 18u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_31u_1[8]) = {1u, 3u, 5u, 7u, 9u, 11u, 13u, 15u};
+
+template <typename OutType>
+const uint8_t* unpack_Nu32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t bit_width, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_Nu32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t bit_width,
+                                   uint32_t* dst_ptr) {
+    uint64_t mask = OWN_BIT_MASK(bit_width);
+    uint64_t next_dword;
+    uint32_t* src32u_ptr = (uint32_t*)src_ptr;
+    uint8_t* src8u_ptr = (uint8_t*)src_ptr;
+    uint32_t* dst32u_ptr = (uint32_t*)dst_ptr;
+    uint32_t bits_in_buf = 0u;
+    uint64_t src = 0u;
+
+    if (2u < values_to_read) {
+        bits_in_buf = OWN_DWORD_WIDTH;
+        src = (uint64_t)(*src32u_ptr);
+
+        src32u_ptr++;
+
+        while (2u < values_to_read) {
+            if (bit_width > bits_in_buf) {
+                next_dword = (uint64_t)(*src32u_ptr);
+                src32u_ptr++;
+                next_dword = next_dword << bits_in_buf;
+                src = src | next_dword;
+                bits_in_buf += OWN_DWORD_WIDTH;
+            }
+            *dst32u_ptr = (uint32_t)(src & mask);
+            src = src >> bit_width;
+            bits_in_buf -= bit_width;
+            dst32u_ptr++;
+            values_to_read--;
+        }
+
+        src8u_ptr = (uint8_t*)src32u_ptr;
+    }
+
+    while (0u < values_to_read) {
+        while (bit_width > bits_in_buf) {
+            next_dword = (uint64_t)(*src8u_ptr);
+            src8u_ptr++;
+            next_dword = next_dword << bits_in_buf;
+            src = src | next_dword;
+            bits_in_buf += OWN_BYTE_WIDTH;
+        }
+        *dst32u_ptr = (uint32_t)(src & mask);
+        src = src >> bit_width;
+        bits_in_buf -= bit_width;
+        dst32u_ptr++;
+        values_to_read--;
+    }
+    return src8u_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_Nu32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t bit_width,
+                                   uint64_t* dst_ptr) {
+    uint64_t mask = OWN_BIT_MASK(bit_width);
+    uint64_t next_dword;
+    uint32_t* src32u_ptr = (uint32_t*)src_ptr;
+    uint8_t* src8u_ptr = (uint8_t*)src_ptr;
+    uint64_t* dst64u_ptr = (uint64_t*)dst_ptr;
+    uint32_t bits_in_buf = 0u;
+    uint64_t src = 0u;
+
+    if (2u < values_to_read) {
+        bits_in_buf = OWN_DWORD_WIDTH;
+        src = (uint64_t)(*src32u_ptr);
+
+        src32u_ptr++;
+
+        while (2u < values_to_read) {
+            if (bit_width > bits_in_buf) {
+                next_dword = (uint64_t)(*src32u_ptr);
+                src32u_ptr++;
+                next_dword = next_dword << bits_in_buf;
+                src = src | next_dword;
+                bits_in_buf += OWN_DWORD_WIDTH;
+            }
+            *dst64u_ptr = (uint64_t)(src & mask);
+            src = src >> bit_width;
+            bits_in_buf -= bit_width;
+            dst64u_ptr++;
+            values_to_read--;
+        }
+
+        src8u_ptr = (uint8_t*)src32u_ptr;
+    }
+
+    while (0u < values_to_read) {
+        while (bit_width > bits_in_buf) {
+            next_dword = (uint64_t)(*src8u_ptr);
+            src8u_ptr++;
+            next_dword = next_dword << bits_in_buf;
+            src = src | next_dword;
+            bits_in_buf += OWN_BYTE_WIDTH;
+        }
+        *dst64u_ptr = (uint64_t)(src & mask);
+        src = src >> bit_width;
+        bits_in_buf -= bit_width;
+        dst64u_ptr++;
+        values_to_read--;
+    }
+    return src8u_ptr;
+}
+
+// ********************** 17u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_17u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_17u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_17u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(17u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(17u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_17u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_17u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_17u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_17u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 17u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 17u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_17u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(17u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(17u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_17u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_17u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_17u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_17u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 17u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 17u, dst_ptr);
+    }
+    return src_ptr;
+}
+// ********************** 18u ****************************** //
+
+template <typename OutType>
+const uint8_t* unpack_18u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_18u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_18u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask16 read_mask = OWN_BIT_MASK(OWN_BITS_2_DWORD(18u * OWN_WORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(18u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_18u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_18u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_18u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_18u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi32(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 18u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 18u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_18u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask16 read_mask = OWN_BIT_MASK(OWN_BITS_2_DWORD(18u * OWN_WORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(18u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_18u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_18u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_18u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_18u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi32(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 18u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 18u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+// ********************** 19u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_19u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_19u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_19u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(19u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(19u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_19u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_19u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_19u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_19u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 19u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 19u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_19u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(19u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(19u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_19u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_19u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_19u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_19u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 19u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 19u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+// ********************** 20u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_20u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_20u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_20u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask16 read_mask = OWN_BIT_MASK(OWN_BITS_2_DWORD(20u * OWN_WORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(20u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_20u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_20u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_20u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_20u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi32(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 20u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 20u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_20u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask16 read_mask = OWN_BIT_MASK(OWN_BITS_2_DWORD(20u * OWN_WORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(20u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_20u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_20u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_20u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_20u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi32(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 20u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 20u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+// ********************** 21u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_21u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_21u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_21u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(21u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(21u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_21u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_21u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_21u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_21u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 21u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 21u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_21u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(21u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(21u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_21u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_21u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_21u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_21u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 21u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 21u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+// ********************** 22u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_22u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_22u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_22u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask16 read_mask = OWN_BIT_MASK(OWN_BITS_2_DWORD(22u * OWN_WORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(22u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_22u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_22u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_22u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_22u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi32(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 22u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 22u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_22u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask16 read_mask = OWN_BIT_MASK(OWN_BITS_2_DWORD(22u * OWN_WORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(22u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_22u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_22u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_22u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_22u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi32(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 22u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 22u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+// ********************** 23u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_23u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_23u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_23u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(23u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(23u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_23u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_23u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_23u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_23u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 23u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 23u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_23u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(23u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(23u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_23u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_23u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_23u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_23u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 23u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 23u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+// ********************** 24u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_24u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_24u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_24u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask16 read_mask = OWN_BIT_MASK(OWN_BITS_2_DWORD(24u * OWN_WORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(24u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_24u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_24u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_24u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_24u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi32(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 24u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 24u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_24u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask16 read_mask = OWN_BIT_MASK(OWN_BITS_2_DWORD(24u * OWN_WORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(24u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_24u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_24u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_24u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_24u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi32(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 24u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 24u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+// ********************** 25u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_25u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_25u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_25u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(25u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(25u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_25u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_25u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_25u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_25u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 25u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 25u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_25u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(25u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(25u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_25u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_25u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_25u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_25u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 25u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 25u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+// ********************** 26u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_26u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_26u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_26u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask16 read_mask = OWN_BIT_MASK(OWN_BITS_2_DWORD(26u * OWN_WORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(26u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_26u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_26u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_26u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_26u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi32(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 26u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 26u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_26u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask16 read_mask = OWN_BIT_MASK(OWN_BITS_2_DWORD(26u * OWN_WORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(26u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_26u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_26u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_26u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_26u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi32(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 26u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 26u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+// ********************** 27u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_27u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_27u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_27u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(27u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(27u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_27u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_27u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_27u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_27u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 27u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 27u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_27u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(27u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(27u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_27u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_27u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_27u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_27u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 27u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 27u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+// ********************** 28u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_28u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_28u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_28u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask16 read_mask = OWN_BIT_MASK(OWN_BITS_2_DWORD(28u * OWN_WORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(28u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_28u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_28u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_28u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_28u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi32(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 28u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 28u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_28u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask16 read_mask = OWN_BIT_MASK(OWN_BITS_2_DWORD(28u * OWN_WORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(28u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_28u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_28u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_28u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_28u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi32(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 28u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 28u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+// ********************** 29u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_29u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_29u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_29u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(29u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(29u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_29u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_29u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_29u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_29u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 29u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 29u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_29u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(29u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(29u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_29u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_29u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_29u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_29u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 29u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 29u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+// ********************** 30u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_30u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_30u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_30u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask16 read_mask = OWN_BIT_MASK(OWN_BITS_2_DWORD(30u * OWN_WORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(30u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_30u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_30u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_30u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_30u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi32(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 30u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 30u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_30u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask16 read_mask = OWN_BIT_MASK(OWN_BITS_2_DWORD(30u * OWN_WORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(30u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_30u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_30u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_30u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_30u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi32(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 30u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 30u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+// ********************** 31u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_31u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_31u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_31u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(31u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(31u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_31u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_31u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_31u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_31u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 31u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 31u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_31u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(31u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(31u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_31u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_31u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_31u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_31u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 31u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 31u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+// ********************** 32u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_32u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_32u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_32u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    memcpy(dst_ptr, src_ptr, values_to_read * 4);

Review Comment:
   warning: use of undeclared identifier 'memcpy' [clang-diagnostic-error]
   ```cpp
       memcpy(dst_ptr, src_ptr, values_to_read * 4);
       ^
   ```
   



##########
be/src/util/bitpacking/unpack_32u.h:
##########
@@ -0,0 +1,1748 @@
+// Copyright 2021-present StarRocks, Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "extend_32u.h"
+#include "unpack_def.h"
+
+// ------------------------------------ 17u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_17u_0[16]) = {0u, 1u, 1u, 2u, 2u, 3u, 3u, 4u,
+                                                                      4u, 5u, 5u, 6u, 6u, 7u, 7u, 8u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_17u_1[16]) = {0u, 1u, 1u, 2u, 2u, 3u, 3u, 4u,
+                                                                      4u, 5u, 5u, 6u, 6u, 7u, 7u, 8u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_17u_0[8]) = {0u, 2u, 4u, 6u, 8u, 10u, 12u, 14u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_17u_1[8]) = {15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u};
+
+// ------------------------------------ 18u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_18u_0[16]) = {0u, 1u, 1u, 2u, 2u, 3u, 3u, 4u,
+                                                                      4u, 5u, 5u, 6u, 6u, 7u, 7u, 8u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_18u_1[16]) = {0u, 1u, 1u, 2u, 2u, 3u, 3u, 4u,
+                                                                      5u, 6u, 6u, 7u, 7u, 8u, 8u, 9u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_18u_0[8]) = {0u, 4u, 8u, 12u, 16u, 20u, 24u, 28u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_18u_1[8]) = {14u, 10u, 6u, 2u, 30u, 26u, 22u, 18u};
+
+// ------------------------------------ 19u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_19u_0[16]) = {0u, 1u, 1u, 2u, 2u, 3u, 3u, 4u,
+                                                                      4u, 5u, 5u, 6u, 7u, 8u, 8u, 9u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_19u_1[16]) = {0u, 1u, 1u, 2u, 2u, 3u, 4u, 5u,
+                                                                      5u, 6u, 6u, 7u, 7u, 8u, 8u, 9u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_19u_0[8]) = {0u, 6u, 12u, 18u, 24u, 30u, 4u, 10u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_19u_1[8]) = {13u, 7u, 1u, 27u, 21u, 15u, 9u, 3u};
+
+// ------------------------------------ 20u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_20u_0[16]) = {0u, 1u, 1u, 2u, 2u, 3u, 3u, 4u,
+                                                                      5u, 6u, 6u, 7u, 7u, 8u, 8u, 9u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_20u_1[16]) = {0u, 1u, 1u, 2u, 3u, 4u, 4u, 5u,
+                                                                      5u, 6u, 6u, 7u, 8u, 9u, 9u, 10u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_20u_0[8]) = {0u, 8u, 16u, 24u, 0u, 8u, 16u, 24u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_20u_1[8]) = {12u, 4u, 28u, 20u, 12u, 4u, 28u, 20u};
+
+// ------------------------------------ 21u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_21u_0[16]) = {0u, 1u, 1u, 2u, 2u, 3u, 3u, 4u,
+                                                                      5u, 6u, 6u, 7u, 7u, 8u, 9u, 10u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_21u_1[16]) = {0u, 1u, 1u, 2u, 3u, 4u, 4u, 5u,
+                                                                      5u, 6u, 7u, 8u, 8u, 9u, 9u, 10u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_21u_0[8]) = {0u, 10u, 20u, 30u, 8u, 18u, 28u, 6u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_21u_1[8]) = {11u, 1u, 23u, 13u, 3u, 25u, 15u, 5u};
+
+// ------------------------------------ 22u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_22u_0[16]) = {0u, 1u, 1u, 2u, 2u, 3u, 4u, 5u,
+                                                                      5u, 6u, 6u, 7u, 8u, 9u, 9u, 10u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_22u_1[16]) = {0u, 1u, 2u, 3u, 3u, 4u, 4u,  5u,
+                                                                      6u, 7u, 7u, 8u, 8u, 9u, 10u, 11u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_22u_0[8]) = {0u, 12u, 24u, 4u, 16u, 28u, 8u, 20u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_22u_1[8]) = {10u, 30u, 18u, 6u, 26u, 14u, 2u, 22u};
+
+// ------------------------------------ 23u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_23u_0[16]) = {0u, 1u, 1u, 2u, 2u, 3u, 4u,  5u,
+                                                                      5u, 6u, 7u, 8u, 8u, 9u, 10u, 11u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_23u_1[16]) = {0u, 1u, 2u, 3u, 3u, 4u,  5u,  6u,
+                                                                      6u, 7u, 7u, 8u, 9u, 10u, 10u, 11u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_23u_0[8]) = {0u, 14u, 28u, 10u, 24u, 6u, 20u, 2u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_23u_1[8]) = {9u, 27u, 13u, 31u, 17u, 3u, 21u, 7u};
+
+// ------------------------------------ 24u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_24u_0[16]) = {0u, 1u, 1u, 2u, 3u, 4u,  4u,  5u,
+                                                                      6u, 7u, 7u, 8u, 9u, 10u, 10u, 11u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_24u_1[16]) = {0u, 1u, 2u, 3u, 3u, 4u,  5u,  6u,
+                                                                      6u, 7u, 8u, 9u, 9u, 10u, 11u, 12u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_24u_0[8]) = {0u, 16u, 0u, 16u, 0u, 16u, 0u, 16u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_24u_1[8]) = {8u, 24u, 8u, 24u, 8u, 24u, 8u, 24u};
+
+// ------------------------------------ 25u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_25u_0[16]) = {0u, 1u, 1u, 2u, 3u, 4u,  4u,  5u,
+                                                                      6u, 7u, 7u, 8u, 9u, 10u, 10u, 11u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_25u_1[16]) = {0u, 1u, 2u, 3u, 3u,  4u,  5u,  6u,
+                                                                      7u, 8u, 8u, 9u, 10u, 11u, 11u, 12u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_25u_0[8]) = {0u, 18u, 4u, 22u, 8u, 26u, 12u, 30u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_25u_1[8]) = {7u, 21u, 3u, 17u, 31u, 13u, 27u, 9u};
+
+// ------------------------------------ 26u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_26u_0[16]) = {0u, 1u, 1u, 2u, 3u, 4u,  4u,  5u,
+                                                                      6u, 7u, 8u, 9u, 9u, 10u, 11u, 12u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_26u_1[16]) = {0u, 1u, 2u, 3u, 4u,  5u,  5u,  6u,
+                                                                      7u, 8u, 8u, 9u, 10u, 11u, 12u, 13u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_26u_0[8]) = {0u, 20u, 8u, 28u, 16u, 4u, 24u, 12u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_26u_1[8]) = {6u, 18u, 30u, 10u, 22u, 2u, 14u, 26u};
+
+// ------------------------------------ 27u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_27u_0[16]) = {0u, 1u, 1u, 2u, 3u,  4u,  5u,  6u,
+                                                                      6u, 7u, 8u, 9u, 10u, 11u, 11u, 12u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_27u_1[16]) = {0u, 1u, 2u, 3u,  4u,  5u,  5u,  6u,
+                                                                      7u, 8u, 9u, 10u, 10u, 11u, 12u, 13u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_27u_0[8]) = {0u, 22u, 12u, 2u, 24u, 14u, 4u, 26u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_27u_1[8]) = {5u, 15u, 25u, 3u, 13u, 23u, 1u, 11u};
+
+// ------------------------------------ 28u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_28u_0[16]) = {0u, 1u, 1u, 2u, 3u,  4u,  5u,  6u,
+                                                                      7u, 8u, 8u, 9u, 10u, 11u, 12u, 13u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_28u_1[16]) = {0u, 1u, 2u, 3u,  4u,  5u,  6u,  7u,
+                                                                      7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_28u_0[8]) = {0u, 24u, 16u, 8u, 0u, 24u, 16u, 8u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_28u_1[8]) = {4u, 12u, 20u, 28u, 4u, 12u, 20u, 28u};
+
+// ------------------------------------ 29u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_29u_0[16]) = {0u, 1u, 1u, 2u,  3u,  4u,  5u,  6u,
+                                                                      7u, 8u, 9u, 10u, 10u, 11u, 12u, 13u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_29u_1[16]) = {0u, 1u, 2u, 3u,  4u,  5u,  6u,  7u,
+                                                                      8u, 9u, 9u, 10u, 11u, 12u, 13u, 14u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_29u_0[8]) = {0u, 26u, 20u, 14u, 8u, 2u, 28u, 22u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_29u_1[8]) = {3u, 9u, 15u, 21u, 27u, 1u, 7u, 13u};
+
+// ------------------------------------ 30u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_30u_0[16]) = {0u, 1u, 1u, 2u,  3u,  4u,  5u,  6u,
+                                                                      7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_30u_1[16]) = {0u, 1u, 2u,  3u,  4u,  5u,  6u,  7u,
+                                                                      8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_30u_0[8]) = {0u, 28u, 24u, 20u, 16u, 12u, 8u, 4u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_30u_1[8]) = {2u, 6u, 10u, 14u, 18u, 22u, 26u, 30u};
+
+// ------------------------------------ 31u -----------------------------------------
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_31u_0[16]) = {0u, 1u, 1u, 2u,  3u,  4u,  5u,  6u,
+                                                                      7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u};
+OWN_ALIGNED_64_ARRAY(static uint32_t permutex_idx_table_31u_1[16]) = {0u, 1u, 2u,  3u,  4u,  5u,  6u,  7u,
+                                                                      8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_31u_0[8]) = {0u, 30u, 28u, 26u, 24u, 22u, 20u, 18u};
+OWN_ALIGNED_64_ARRAY(static uint64_t shift_table_31u_1[8]) = {1u, 3u, 5u, 7u, 9u, 11u, 13u, 15u};
+
+template <typename OutType>
+const uint8_t* unpack_Nu32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t bit_width, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_Nu32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t bit_width,
+                                   uint32_t* dst_ptr) {
+    uint64_t mask = OWN_BIT_MASK(bit_width);
+    uint64_t next_dword;
+    uint32_t* src32u_ptr = (uint32_t*)src_ptr;
+    uint8_t* src8u_ptr = (uint8_t*)src_ptr;
+    uint32_t* dst32u_ptr = (uint32_t*)dst_ptr;
+    uint32_t bits_in_buf = 0u;
+    uint64_t src = 0u;
+
+    if (2u < values_to_read) {
+        bits_in_buf = OWN_DWORD_WIDTH;
+        src = (uint64_t)(*src32u_ptr);
+
+        src32u_ptr++;
+
+        while (2u < values_to_read) {
+            if (bit_width > bits_in_buf) {
+                next_dword = (uint64_t)(*src32u_ptr);
+                src32u_ptr++;
+                next_dword = next_dword << bits_in_buf;
+                src = src | next_dword;
+                bits_in_buf += OWN_DWORD_WIDTH;
+            }
+            *dst32u_ptr = (uint32_t)(src & mask);
+            src = src >> bit_width;
+            bits_in_buf -= bit_width;
+            dst32u_ptr++;
+            values_to_read--;
+        }
+
+        src8u_ptr = (uint8_t*)src32u_ptr;
+    }
+
+    while (0u < values_to_read) {
+        while (bit_width > bits_in_buf) {
+            next_dword = (uint64_t)(*src8u_ptr);
+            src8u_ptr++;
+            next_dword = next_dword << bits_in_buf;
+            src = src | next_dword;
+            bits_in_buf += OWN_BYTE_WIDTH;
+        }
+        *dst32u_ptr = (uint32_t)(src & mask);
+        src = src >> bit_width;
+        bits_in_buf -= bit_width;
+        dst32u_ptr++;
+        values_to_read--;
+    }
+    return src8u_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_Nu32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t bit_width,
+                                   uint64_t* dst_ptr) {
+    uint64_t mask = OWN_BIT_MASK(bit_width);
+    uint64_t next_dword;
+    uint32_t* src32u_ptr = (uint32_t*)src_ptr;
+    uint8_t* src8u_ptr = (uint8_t*)src_ptr;
+    uint64_t* dst64u_ptr = (uint64_t*)dst_ptr;
+    uint32_t bits_in_buf = 0u;
+    uint64_t src = 0u;
+
+    if (2u < values_to_read) {
+        bits_in_buf = OWN_DWORD_WIDTH;
+        src = (uint64_t)(*src32u_ptr);
+
+        src32u_ptr++;
+
+        while (2u < values_to_read) {
+            if (bit_width > bits_in_buf) {
+                next_dword = (uint64_t)(*src32u_ptr);
+                src32u_ptr++;
+                next_dword = next_dword << bits_in_buf;
+                src = src | next_dword;
+                bits_in_buf += OWN_DWORD_WIDTH;
+            }
+            *dst64u_ptr = (uint64_t)(src & mask);
+            src = src >> bit_width;
+            bits_in_buf -= bit_width;
+            dst64u_ptr++;
+            values_to_read--;
+        }
+
+        src8u_ptr = (uint8_t*)src32u_ptr;
+    }
+
+    while (0u < values_to_read) {
+        while (bit_width > bits_in_buf) {
+            next_dword = (uint64_t)(*src8u_ptr);
+            src8u_ptr++;
+            next_dword = next_dword << bits_in_buf;
+            src = src | next_dword;
+            bits_in_buf += OWN_BYTE_WIDTH;
+        }
+        *dst64u_ptr = (uint64_t)(src & mask);
+        src = src >> bit_width;
+        bits_in_buf -= bit_width;
+        dst64u_ptr++;
+        values_to_read--;
+    }
+    return src8u_ptr;
+}
+
+// ********************** 17u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_17u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_17u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_17u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(17u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(17u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_17u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_17u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_17u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_17u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 17u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 17u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_17u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(17u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(17u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_17u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_17u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_17u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_17u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 17u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 17u, dst_ptr);
+    }
+    return src_ptr;
+}
+// ********************** 18u ****************************** //
+
+template <typename OutType>
+const uint8_t* unpack_18u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_18u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_18u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask16 read_mask = OWN_BIT_MASK(OWN_BITS_2_DWORD(18u * OWN_WORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(18u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_18u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_18u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_18u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_18u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi32(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 18u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 18u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_18u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask16 read_mask = OWN_BIT_MASK(OWN_BITS_2_DWORD(18u * OWN_WORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(18u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_18u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_18u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_18u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_18u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi32(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 18u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 18u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+// ********************** 19u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_19u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_19u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_19u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(19u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(19u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_19u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_19u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_19u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_19u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 19u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 19u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_19u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(19u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(19u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_19u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_19u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_19u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_19u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 19u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 19u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+// ********************** 20u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_20u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_20u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_20u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask16 read_mask = OWN_BIT_MASK(OWN_BITS_2_DWORD(20u * OWN_WORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(20u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_20u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_20u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_20u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_20u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi32(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 20u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 20u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_20u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask16 read_mask = OWN_BIT_MASK(OWN_BITS_2_DWORD(20u * OWN_WORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(20u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_20u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_20u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_20u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_20u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi32(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 20u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 20u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+// ********************** 21u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_21u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_21u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_21u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(21u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(21u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_21u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_21u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_21u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_21u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 21u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 21u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_21u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(21u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(21u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_21u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_21u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_21u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_21u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 21u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 21u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+// ********************** 22u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_22u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_22u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_22u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask16 read_mask = OWN_BIT_MASK(OWN_BITS_2_DWORD(22u * OWN_WORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(22u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_22u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_22u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_22u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_22u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi32(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 22u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 22u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_22u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask16 read_mask = OWN_BIT_MASK(OWN_BITS_2_DWORD(22u * OWN_WORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(22u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_22u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_22u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_22u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_22u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi32(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 22u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 22u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+// ********************** 23u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_23u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_23u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_23u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(23u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(23u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_23u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_23u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_23u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_23u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 23u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 23u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_23u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(23u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(23u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_23u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_23u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_23u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_23u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 23u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 23u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+// ********************** 24u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_24u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_24u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_24u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask16 read_mask = OWN_BIT_MASK(OWN_BITS_2_DWORD(24u * OWN_WORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(24u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_24u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_24u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_24u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_24u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi32(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 24u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 24u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_24u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask16 read_mask = OWN_BIT_MASK(OWN_BITS_2_DWORD(24u * OWN_WORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(24u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_24u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_24u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_24u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_24u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi32(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 24u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 24u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+// ********************** 25u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_25u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_25u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_25u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(25u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(25u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_25u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_25u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_25u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_25u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 25u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 25u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_25u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(25u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(25u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_25u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_25u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_25u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_25u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 25u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 25u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+// ********************** 26u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_26u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_26u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_26u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask16 read_mask = OWN_BIT_MASK(OWN_BITS_2_DWORD(26u * OWN_WORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(26u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_26u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_26u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_26u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_26u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi32(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 26u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 26u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_26u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask16 read_mask = OWN_BIT_MASK(OWN_BITS_2_DWORD(26u * OWN_WORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(26u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_26u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_26u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_26u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_26u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi32(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 26u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 26u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+// ********************** 27u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_27u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_27u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_27u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(27u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(27u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_27u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_27u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_27u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_27u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 27u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 27u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_27u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(27u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(27u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_27u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_27u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_27u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_27u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 27u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 27u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+// ********************** 28u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_28u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_28u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_28u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask16 read_mask = OWN_BIT_MASK(OWN_BITS_2_DWORD(28u * OWN_WORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(28u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_28u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_28u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_28u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_28u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi32(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 28u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 28u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_28u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask16 read_mask = OWN_BIT_MASK(OWN_BITS_2_DWORD(28u * OWN_WORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(28u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_28u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_28u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_28u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_28u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi32(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 28u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 28u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+// ********************** 29u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_29u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_29u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_29u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(29u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(29u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_29u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_29u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_29u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_29u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 29u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 29u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_29u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(29u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(29u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_29u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_29u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_29u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_29u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 29u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 29u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+// ********************** 30u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_30u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_30u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_30u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask16 read_mask = OWN_BIT_MASK(OWN_BITS_2_DWORD(30u * OWN_WORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(30u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_30u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_30u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_30u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_30u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi32(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 30u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 30u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_30u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask16 read_mask = OWN_BIT_MASK(OWN_BITS_2_DWORD(30u * OWN_WORD_WIDTH));
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(30u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_30u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_30u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_30u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_30u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi32(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 30u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 30u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+// ********************** 31u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_31u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_31u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_31u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(31u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(31u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_31u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_31u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_31u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_31u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            _mm512_storeu_si512(dst_ptr, zmm[0]);
+
+            src_ptr += 2u * 31u;
+            dst_ptr += 16u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 31u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_31u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint64_t* dst_ptr) {
+    if (values_to_read >= 16u) {
+        __mmask32 read_mask = OWN_BIT_MASK(31u);
+        __m512i parse_mask0 = _mm512_set1_epi32(OWN_BIT_MASK(31u));
+
+        __m512i permutex_idx_ptr[2];
+        permutex_idx_ptr[0] = _mm512_load_si512(permutex_idx_table_31u_0);
+        permutex_idx_ptr[1] = _mm512_load_si512(permutex_idx_table_31u_1);
+
+        __m512i shift_mask_ptr[2];
+        shift_mask_ptr[0] = _mm512_load_si512(shift_table_31u_0);
+        shift_mask_ptr[1] = _mm512_load_si512(shift_table_31u_1);
+
+        while (values_to_read >= 16u) {
+            __m512i srcmm, zmm[2];
+
+            srcmm = _mm512_maskz_loadu_epi16(read_mask, src_ptr);
+
+            // permuting so in zmm[0] will be elements with even indexes and in zmm[1] - with odd ones
+            zmm[0] = _mm512_permutexvar_epi32(permutex_idx_ptr[0], srcmm);
+            zmm[1] = _mm512_permutexvar_epi32(permutex_idx_ptr[1], srcmm);
+
+            // shifting elements so they start from the start of the word
+            zmm[0] = _mm512_srlv_epi64(zmm[0], shift_mask_ptr[0]);
+            zmm[1] = _mm512_sllv_epi64(zmm[1], shift_mask_ptr[1]);
+
+            // gathering even and odd elements together
+            zmm[0] = _mm512_mask_mov_epi32(zmm[0], 0xAAAA, zmm[1]);
+            zmm[0] = _mm512_and_si512(zmm[0], parse_mask0);
+
+            extend_32u64u(zmm[0], dst_ptr);
+
+            src_ptr += 2u * 31u;
+            values_to_read -= 16u;
+        }
+    }
+
+    if (values_to_read > 0) {
+        src_ptr = unpack_Nu32u(src_ptr, values_to_read, 31u, dst_ptr);
+    }
+    return src_ptr;
+}
+
+// ********************** 32u ****************************** //
+template <typename OutType>
+const uint8_t* unpack_32u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
+
+template <>
+inline const uint8_t* unpack_32u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint8_t* dst_ptr) {
+    return src_ptr;
+}
+
+template <>
+inline const uint8_t* unpack_32u32u(const uint8_t* src_ptr, uint32_t values_to_read, uint32_t* dst_ptr) {

Review Comment:
   warning: pointer parameter 'dst_ptr' can be pointer to const [readability-non-const-parameter]
   
   be/src/util/bitpacking/unpack_32u.h:1716:
   ```diff
   - const uint8_t* unpack_32u32u(const uint8_t* src_ptr, uint32_t values_to_read, OutType* dst_ptr);
   + const uint8_t* unpack_32u32u(const uint8_t* src_ptr, uint32_t values_to_read, const OutType* dst_ptr);
   ```
   
   ```suggestion
   inline const uint8_t* unpack_32u32u(const uint8_t* src_ptr, uint32_t values_to_read, const uint32_t* dst_ptr) {
   ```
   



##########
be/src/util/bitpacking/unpack_def.h:
##########
@@ -0,0 +1,92 @@
+// Copyright 2021-present StarRocks, Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <immintrin.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#define OWN_ALIGNED_ARRAY(array_declaration, alignment) array_declaration __attribute__((aligned(alignment)))
+#define OWN_ALIGNED_64_ARRAY(array_declaration) OWN_ALIGNED_ARRAY(array_declaration, 64u)
+
+/**
+ * @brief Defines internal inline Intel core function
+ */
+
+#define OWN_MAX_16U 0xFFFF                                 /**< Max value for uint16_t */
+#define OWN_MAX_32U 0xFFFFFFFF                             /**< Max value for uint32_t */
+#define OWN_1_BIT_MASK 1u                                  /**< Mask for 1-bit integer */
+#define OWN_2_BIT_MASK 3u                                  /**< Mask for 2-bit integer */
+#define OWN_3_BIT_MASK 7u                                  /**< Mask for 3-bit integer */
+#define OWN_4_BIT_MASK 0xfu                                /**< Mask for 4-bit integer */
+#define OWN_5_BIT_MASK 0x1fu                               /**< Mask for 5-bit integer */
+#define OWN_6_BIT_MASK 0x3fu                               /**< Mask for 6-bit integer */
+#define OWN_7_BIT_MASK 0x7fu                               /**< Mask for 7-bit integer */
+#define OWN_HIGH_BIT_MASK 0x80u                            /**< Mask for most significant bit in a byte */
+#define OWN_LOW_BIT_MASK 1u                                /**< Mask for least significant bit in a byte */
+#define OWN_BYTE_WIDTH 8u                                  /**< Byte width in bits */
+#define OWN_WORD_WIDTH 16u                                 /**< Word width in bits */
+#define OWN_3_BYTE_WIDTH 24u                               /**< 3-byte width in bits */
+#define OWN_DWORD_WIDTH 32u                                /**< Dword width in bits */
+#define OWN_6_BYTE_WIDTH 48u                               /**< 6-byte width in bits */
+#define OWN_7_BYTE_WIDTH 56u                               /**< 7-byte width in bits */
+#define OWN_QWORD_WIDTH 64u                                /**< Qword width in bits */

Review Comment:
   warning: macro is not used [clang-diagnostic-unused-macros]
   ```cpp
   #define OWN_QWORD_WIDTH 64u                                /**< Qword width in bits */
           ^
   ```
   



##########
be/src/util/bitpacking/unpack_def.h:
##########
@@ -0,0 +1,92 @@
+// Copyright 2021-present StarRocks, Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <immintrin.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#define OWN_ALIGNED_ARRAY(array_declaration, alignment) array_declaration __attribute__((aligned(alignment)))
+#define OWN_ALIGNED_64_ARRAY(array_declaration) OWN_ALIGNED_ARRAY(array_declaration, 64u)
+
+/**
+ * @brief Defines internal inline Intel core function
+ */
+
+#define OWN_MAX_16U 0xFFFF                                 /**< Max value for uint16_t */
+#define OWN_MAX_32U 0xFFFFFFFF                             /**< Max value for uint32_t */
+#define OWN_1_BIT_MASK 1u                                  /**< Mask for 1-bit integer */
+#define OWN_2_BIT_MASK 3u                                  /**< Mask for 2-bit integer */
+#define OWN_3_BIT_MASK 7u                                  /**< Mask for 3-bit integer */
+#define OWN_4_BIT_MASK 0xfu                                /**< Mask for 4-bit integer */
+#define OWN_5_BIT_MASK 0x1fu                               /**< Mask for 5-bit integer */
+#define OWN_6_BIT_MASK 0x3fu                               /**< Mask for 6-bit integer */
+#define OWN_7_BIT_MASK 0x7fu                               /**< Mask for 7-bit integer */
+#define OWN_HIGH_BIT_MASK 0x80u                            /**< Mask for most significant bit in a byte */
+#define OWN_LOW_BIT_MASK 1u                                /**< Mask for least significant bit in a byte */
+#define OWN_BYTE_WIDTH 8u                                  /**< Byte width in bits */
+#define OWN_WORD_WIDTH 16u                                 /**< Word width in bits */
+#define OWN_3_BYTE_WIDTH 24u                               /**< 3-byte width in bits */
+#define OWN_DWORD_WIDTH 32u                                /**< Dword width in bits */
+#define OWN_6_BYTE_WIDTH 48u                               /**< 6-byte width in bits */
+#define OWN_7_BYTE_WIDTH 56u                               /**< 7-byte width in bits */

Review Comment:
   warning: macro is not used [clang-diagnostic-unused-macros]
   ```cpp
   #define OWN_7_BYTE_WIDTH 56u                               /**< 7-byte width in bits */
           ^
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org