You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2021/08/03 04:00:14 UTC
[incubator-doris] branch master updated: [Bug] fix violating C/C++
aliasing rules cause a error hash value in decimal value (#6348)
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new 16bc5fa [Bug] fix violating C/C++ aliasing rules cause a error hash value in decimal value (#6348)
16bc5fa is described below
commit 16bc5fa5852f868464c05d743c35fb2a8f80f5f1
Author: stdpain <34...@users.noreply.github.com>
AuthorDate: Tue Aug 3 12:00:03 2021 +0800
[Bug] fix violating C/C++ aliasing rules cause a error hash value in decimal value (#6348)
In RuntimeFilter BloomFilter, decimal column will got a wrong hash value because violating aliasing rules
decimal12_t decimal = { 12, 12 };
murmurhash3(decimal) in bloom filter: 2167721464
expect: 4203026776
---
be/src/exec/olap_scanner.cpp | 17 ++++++------
be/src/exprs/bloomfilter_predicate.h | 32 ++++++++++++++--------
.../segment_v2/bloom_filter_index_writer.cpp | 12 ++++----
3 files changed, 35 insertions(+), 26 deletions(-)
diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp
index 476f032..ebe2200 100644
--- a/be/src/exec/olap_scanner.cpp
+++ b/be/src/exec/olap_scanner.cpp
@@ -21,7 +21,9 @@
#include <string>
#include "gen_cpp/PaloInternalService_types.h"
+#include "olap/decimal12.h"
#include "olap/field.h"
+#include "olap/uint24.h"
#include "olap_scan_node.h"
#include "olap_utils.h"
#include "runtime/descriptors.h"
@@ -451,9 +453,10 @@ void OlapScanner::_convert_row_to_tuple(Tuple* tuple) {
}
case TYPE_DECIMALV2: {
DecimalV2Value* slot = tuple->get_decimalv2_slot(slot_desc->tuple_offset());
+ auto packed_decimal = *reinterpret_cast<const decimal12_t*>(ptr);
- int64_t int_value = *(int64_t*)(ptr);
- int32_t frac_value = *(int32_t*)(ptr + sizeof(int64_t));
+ int64_t int_value = packed_decimal.integer;
+ int32_t frac_value = packed_decimal.fraction;
if (!slot->from_olap_decimal(int_value, frac_value)) {
tuple->set_null(slot_desc->null_indicator_offset());
}
@@ -469,12 +472,10 @@ void OlapScanner::_convert_row_to_tuple(Tuple* tuple) {
}
case TYPE_DATE: {
DateTimeValue* slot = tuple->get_datetime_slot(slot_desc->tuple_offset());
- uint64_t value = 0;
- value = *(unsigned char*)(ptr + 2);
- value <<= 8;
- value |= *(unsigned char*)(ptr + 1);
- value <<= 8;
- value |= *(unsigned char*)(ptr);
+
+ uint24_t date = *reinterpret_cast<const uint24_t*>(ptr);
+ uint64_t value = uint32_t(date);
+
if (!slot->from_olap_date(value)) {
tuple->set_null(slot_desc->null_indicator_offset());
}
diff --git a/be/src/exprs/bloomfilter_predicate.h b/be/src/exprs/bloomfilter_predicate.h
index 8fbb916..492c687 100644
--- a/be/src/exprs/bloomfilter_predicate.h
+++ b/be/src/exprs/bloomfilter_predicate.h
@@ -26,7 +26,9 @@
#include "exprs/block_bloom_filter.hpp"
#include "exprs/predicate.h"
#include "olap/bloom_filter.hpp"
+#include "olap/decimal12.h"
#include "olap/rowset/segment_v2/bloom_filter.h"
+#include "olap/uint24.h"
#include "runtime/mem_tracker.h"
#include "runtime/raw_value.h"
@@ -223,30 +225,38 @@ struct DateTimeFindOp : public CommonFindOp<DateTimeValue, BloomFilterAdaptor> {
}
};
+// avoid violating C/C++ aliasing rules.
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101684
+
template <class BloomFilterAdaptor>
struct DateFindOp : public CommonFindOp<DateTimeValue, BloomFilterAdaptor> {
bool find_olap_engine(const BloomFilterAdaptor& bloom_filter, const void* data) const {
- uint64_t value = 0;
- value = *(unsigned char*)((char*)data + 2);
- value <<= 8;
- value |= *(unsigned char*)((char*)data + 1);
- value <<= 8;
- value |= *(unsigned char*)((char*)data);
+ uint24_t date = *static_cast<const uint24_t*>(data);
+ uint64_t value = uint32_t(date);
+
DateTimeValue date_value;
date_value.from_olap_date(value);
date_value.to_datetime();
- return bloom_filter.test_bytes((char*)&date_value, sizeof(DateTimeValue));
+
+ char data_bytes[sizeof(date_value)];
+ memcpy(&data_bytes, &date_value, sizeof(date_value));
+ return bloom_filter.test_bytes(data_bytes, sizeof(DateTimeValue));
}
};
template <class BloomFilterAdaptor>
-struct DecimalV2FindOp : public CommonFindOp<DateTimeValue, BloomFilterAdaptor> {
+struct DecimalV2FindOp : public CommonFindOp<DecimalV2Value, BloomFilterAdaptor> {
bool find_olap_engine(const BloomFilterAdaptor& bloom_filter, const void* data) const {
+ auto packed_decimal = *static_cast<const decimal12_t*>(data);
DecimalV2Value value;
- int64_t int_value = *(int64_t*)(data);
- int32_t frac_value = *(int32_t*)((char*)data + sizeof(int64_t));
+ int64_t int_value = packed_decimal.integer;
+ int32_t frac_value = packed_decimal.fraction;
value.from_olap_decimal(int_value, frac_value);
- return bloom_filter.test_bytes((char*)&value, sizeof(DecimalV2Value));
+
+ constexpr int decimal_value_sz = sizeof(DecimalV2Value);
+ char data_bytes[decimal_value_sz];
+ memcpy(&data_bytes, &value, decimal_value_sz);
+ return bloom_filter.test_bytes(data_bytes, decimal_value_sz);
}
};
diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
index 30eaf80..4c031c9 100644
--- a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
@@ -48,14 +48,12 @@ struct BloomFilterTraits<Slice> {
};
struct Int128Comparator {
- bool operator()(const PackedInt128& a, const PackedInt128& b) const {
- return a.value < b.value;
- }
+ bool operator()(const int128_t& a, const int128_t& b) const { return a < b; }
};
template <>
struct BloomFilterTraits<int128_t> {
- using ValueDict = std::set<PackedInt128, Int128Comparator>;
+ using ValueDict = std::set<int128_t, Int128Comparator>;
};
// Builder for bloom filter. In doris, bloom filter index is used in
@@ -90,9 +88,9 @@ public:
_typeinfo->deep_copy(&new_value, v, &_pool);
_values.insert(new_value);
} else if constexpr (_is_int128()) {
- PackedInt128 new_value;
- memcpy(&new_value.value, v, sizeof(PackedInt128));
- _values.insert((*reinterpret_cast<CppType*>(&new_value)));
+ int128_t new_value;
+ memcpy(&new_value, v, sizeof(PackedInt128));
+ _values.insert(new_value);
} else {
_values.insert(*v);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org