You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2021/08/03 04:00:14 UTC

[incubator-doris] branch master updated: [Bug] fix violating C/C++ aliasing rules cause a error hash value in decimal value (#6348)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 16bc5fa  [Bug] fix violating C/C++ aliasing rules cause a error hash value in decimal value (#6348)
16bc5fa is described below

commit 16bc5fa5852f868464c05d743c35fb2a8f80f5f1
Author: stdpain <34...@users.noreply.github.com>
AuthorDate: Tue Aug 3 12:00:03 2021 +0800

    [Bug] fix violating C/C++ aliasing rules cause a error hash value in decimal value (#6348)
    
    In RuntimeFilter BloomFilter, decimal column will got a wrong hash value because violating  aliasing rules
    decimal12_t decimal = { 12, 12 };
    murmurhash3(decimal) in bloom filter: 2167721464
    expect: 4203026776
---
 be/src/exec/olap_scanner.cpp                       | 17 ++++++------
 be/src/exprs/bloomfilter_predicate.h               | 32 ++++++++++++++--------
 .../segment_v2/bloom_filter_index_writer.cpp       | 12 ++++----
 3 files changed, 35 insertions(+), 26 deletions(-)

diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp
index 476f032..ebe2200 100644
--- a/be/src/exec/olap_scanner.cpp
+++ b/be/src/exec/olap_scanner.cpp
@@ -21,7 +21,9 @@
 #include <string>
 
 #include "gen_cpp/PaloInternalService_types.h"
+#include "olap/decimal12.h"
 #include "olap/field.h"
+#include "olap/uint24.h"
 #include "olap_scan_node.h"
 #include "olap_utils.h"
 #include "runtime/descriptors.h"
@@ -451,9 +453,10 @@ void OlapScanner::_convert_row_to_tuple(Tuple* tuple) {
         }
         case TYPE_DECIMALV2: {
             DecimalV2Value* slot = tuple->get_decimalv2_slot(slot_desc->tuple_offset());
+            auto packed_decimal = *reinterpret_cast<const decimal12_t*>(ptr);
 
-            int64_t int_value = *(int64_t*)(ptr);
-            int32_t frac_value = *(int32_t*)(ptr + sizeof(int64_t));
+            int64_t int_value = packed_decimal.integer;
+            int32_t frac_value = packed_decimal.fraction;
             if (!slot->from_olap_decimal(int_value, frac_value)) {
                 tuple->set_null(slot_desc->null_indicator_offset());
             }
@@ -469,12 +472,10 @@ void OlapScanner::_convert_row_to_tuple(Tuple* tuple) {
         }
         case TYPE_DATE: {
             DateTimeValue* slot = tuple->get_datetime_slot(slot_desc->tuple_offset());
-            uint64_t value = 0;
-            value = *(unsigned char*)(ptr + 2);
-            value <<= 8;
-            value |= *(unsigned char*)(ptr + 1);
-            value <<= 8;
-            value |= *(unsigned char*)(ptr);
+
+            uint24_t date = *reinterpret_cast<const uint24_t*>(ptr);
+            uint64_t value = uint32_t(date);
+
             if (!slot->from_olap_date(value)) {
                 tuple->set_null(slot_desc->null_indicator_offset());
             }
diff --git a/be/src/exprs/bloomfilter_predicate.h b/be/src/exprs/bloomfilter_predicate.h
index 8fbb916..492c687 100644
--- a/be/src/exprs/bloomfilter_predicate.h
+++ b/be/src/exprs/bloomfilter_predicate.h
@@ -26,7 +26,9 @@
 #include "exprs/block_bloom_filter.hpp"
 #include "exprs/predicate.h"
 #include "olap/bloom_filter.hpp"
+#include "olap/decimal12.h"
 #include "olap/rowset/segment_v2/bloom_filter.h"
+#include "olap/uint24.h"
 #include "runtime/mem_tracker.h"
 #include "runtime/raw_value.h"
 
@@ -223,30 +225,38 @@ struct DateTimeFindOp : public CommonFindOp<DateTimeValue, BloomFilterAdaptor> {
     }
 };
 
+// avoid violating C/C++ aliasing rules.
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101684
+
 template <class BloomFilterAdaptor>
 struct DateFindOp : public CommonFindOp<DateTimeValue, BloomFilterAdaptor> {
     bool find_olap_engine(const BloomFilterAdaptor& bloom_filter, const void* data) const {
-        uint64_t value = 0;
-        value = *(unsigned char*)((char*)data + 2);
-        value <<= 8;
-        value |= *(unsigned char*)((char*)data + 1);
-        value <<= 8;
-        value |= *(unsigned char*)((char*)data);
+        uint24_t date = *static_cast<const uint24_t*>(data);
+        uint64_t value = uint32_t(date);
+
         DateTimeValue date_value;
         date_value.from_olap_date(value);
         date_value.to_datetime();
-        return bloom_filter.test_bytes((char*)&date_value, sizeof(DateTimeValue));
+
+        char data_bytes[sizeof(date_value)];
+        memcpy(&data_bytes, &date_value, sizeof(date_value));
+        return bloom_filter.test_bytes(data_bytes, sizeof(DateTimeValue));
     }
 };
 
 template <class BloomFilterAdaptor>
-struct DecimalV2FindOp : public CommonFindOp<DateTimeValue, BloomFilterAdaptor> {
+struct DecimalV2FindOp : public CommonFindOp<DecimalV2Value, BloomFilterAdaptor> {
     bool find_olap_engine(const BloomFilterAdaptor& bloom_filter, const void* data) const {
+        auto packed_decimal = *static_cast<const decimal12_t*>(data);
         DecimalV2Value value;
-        int64_t int_value = *(int64_t*)(data);
-        int32_t frac_value = *(int32_t*)((char*)data + sizeof(int64_t));
+        int64_t int_value = packed_decimal.integer;
+        int32_t frac_value = packed_decimal.fraction;
         value.from_olap_decimal(int_value, frac_value);
-        return bloom_filter.test_bytes((char*)&value, sizeof(DecimalV2Value));
+
+        constexpr int decimal_value_sz = sizeof(DecimalV2Value);
+        char data_bytes[decimal_value_sz];
+        memcpy(&data_bytes, &value, decimal_value_sz);
+        return bloom_filter.test_bytes(data_bytes, decimal_value_sz);
     }
 };
 
diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
index 30eaf80..4c031c9 100644
--- a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
@@ -48,14 +48,12 @@ struct BloomFilterTraits<Slice> {
 };
 
 struct Int128Comparator {
-    bool operator()(const PackedInt128& a, const PackedInt128& b) const {
-        return a.value < b.value;
-    }
+    bool operator()(const int128_t& a, const int128_t& b) const { return a < b; }
 };
 
 template <>
 struct BloomFilterTraits<int128_t> {
-    using ValueDict = std::set<PackedInt128, Int128Comparator>;
+    using ValueDict = std::set<int128_t, Int128Comparator>;
 };
 
 // Builder for bloom filter. In doris, bloom filter index is used in
@@ -90,9 +88,9 @@ public:
                     _typeinfo->deep_copy(&new_value, v, &_pool);
                     _values.insert(new_value);
                 } else if constexpr (_is_int128()) {
-                    PackedInt128 new_value;
-                    memcpy(&new_value.value, v, sizeof(PackedInt128));
-                    _values.insert((*reinterpret_cast<CppType*>(&new_value)));
+                    int128_t new_value;
+                    memcpy(&new_value, v, sizeof(PackedInt128));
+                    _values.insert(new_value);
                 } else {
                     _values.insert(*v);
                 }

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org