You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/06/23 06:03:07 UTC
[doris] branch master updated: [improvement] each tuple starting at aligned address to build with ubsan enabled (#8831)
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new f466668d48 [improvement] each tuple starting at aligned address to build with ubsan enabled (#8831)
f466668d48 is described below
commit f466668d4892c00f0c02cee35a6765b5bd575d28
Author: Yongqiang YANG <98...@users.noreply.github.com>
AuthorDate: Thu Jun 23 14:03:01 2022 +0800
[improvement] each tuple starting at aligned address to build with ubsan enabled (#8831)
When I builded doris be with ubsan enabled and enabled vectorization,
be core dump at doris::DecimalV2Value::operator long(). It cored
because accessing on a non-aligned address by sse.
With ubsan enabled, compile generates different assemble code including
sse instruction.
A sender serializes tuples to a contiguous memory area, while a receiver
just copy it. So we should align each tuple offset to 16 bytes.
For compatibility, we should use a config to control it.
BTW: with tools like ubsan, asan, tsan we can find bugs more easily,
e.g. #8815. It is difficult to find the bug without ubsan.
Anyway, we should use modern tools to be more productive.
---
be/src/common/config.h | 1 +
be/src/runtime/row_batch.cpp | 20 +++++++++++++++++++-
2 files changed, 20 insertions(+), 1 deletion(-)
diff --git a/be/src/common/config.h b/be/src/common/config.h
index fd7ebdd395..d3c189231b 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -144,6 +144,7 @@ CONF_String(doris_cgroups, "");
CONF_Int32(num_threads_per_core, "3");
// if true, compresses tuple data in Serialize
CONF_mBool(compress_rowbatches, "true");
+CONF_mBool(rowbatch_align_tuple_offset, "false");
// interval between profile reports; in seconds
CONF_mInt32(status_report_interval, "5");
// if true, each disk will have a separate thread pool for scanner
diff --git a/be/src/runtime/row_batch.cpp b/be/src/runtime/row_batch.cpp
index 8627542a85..74a79a6db6 100644
--- a/be/src/runtime/row_batch.cpp
+++ b/be/src/runtime/row_batch.cpp
@@ -156,6 +156,10 @@ RowBatch::RowBatch(const RowDescriptor& row_desc, const PRowBatch& input_batch)
for (auto slot : desc->string_slots()) {
DCHECK(slot->type().is_string_type());
+ if (tuple->is_null(slot->null_indicator_offset())) {
+ continue;
+ }
+
StringValue* string_val = tuple->get_string_slot(slot->tuple_offset());
int64_t offset = convert_to<int64_t>(string_val->ptr);
string_val->ptr = tuple_data + offset;
@@ -209,6 +213,14 @@ RowBatch::~RowBatch() {
clear();
}
+static inline size_t align_tuple_offset(size_t offset) {
+ if (config::rowbatch_align_tuple_offset) {
+ return (offset + alignof(std::max_align_t) - 1) & (~(alignof(std::max_align_t) - 1));
+ }
+
+ return offset;
+}
+
Status RowBatch::serialize(PRowBatch* output_batch, size_t* uncompressed_size,
size_t* compressed_size, bool allow_transfer_large_data) {
// num_rows
@@ -247,11 +259,16 @@ Status RowBatch::serialize(PRowBatch* output_batch, size_t* uncompressed_size,
mutable_new_tuple_offsets->Add(-1);
continue;
}
+
+ int64_t old_offset = offset;
+ offset = align_tuple_offset(offset);
+ tuple_data += offset - old_offset;
+
// Record offset before creating copy (which increments offset and tuple_data)
mutable_tuple_offsets->Add((int32_t)offset);
mutable_new_tuple_offsets->Add(offset);
row->get_tuple(j)->deep_copy(*desc, &tuple_data, &offset, /* convert_ptrs */ true);
- CHECK_LE(offset, tuple_byte_size);
+ CHECK_GE(offset, 0);
}
}
CHECK_EQ(offset, tuple_byte_size)
@@ -541,6 +558,7 @@ size_t RowBatch::total_byte_size() const {
if (tuple == nullptr) {
continue;
}
+ result = align_tuple_offset(result);
result += desc->byte_size();
for (auto slot : desc->string_slots()) {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org