You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2016/05/13 22:53:00 UTC
[4/4] incubator-impala git commit: IMPALA-2809: Improve scalar
ByteSwap().
IMPALA-2809: Improve scalar ByteSwap().
This patch improves our ByteSwap() function by handling
more byte sizes in the fast path, as opposed to the
loop-based slow path.
ByteSwap() is used heavily in when scanning Parquet decimals.
Before this patch, VTune showed ByteSwap() among the top
three worst cycle offenders when running TPCH-Q6 on my local
setup with a large lineitem table.
After this patch, ByteSwap() shows no significant contribution
to the overall cycles spent.
There was a measurable improvement of a few percent for TPCH-Q6.
Change-Id: I4f462e6bdb022db46b48889a6a7426120a80d9b4
Reviewed-on: http://gerrit.cloudera.org:8080/3033
Reviewed-by: Dan Hecht <dh...@cloudera.com>
Tested-by: Internal Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/0306dd57
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/0306dd57
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/0306dd57
Branch: refs/heads/master
Commit: 0306dd576d05b6773e380d93af8b3dab90ecdd32
Parents: 46c3e43
Author: Youwei Wang <yo...@intel.com>
Authored: Wed May 11 15:27:27 2016 -0700
Committer: Tim Armstrong <ta...@cloudera.com>
Committed: Fri May 13 15:52:53 2016 -0700
----------------------------------------------------------------------
be/src/util/bit-util-test.cc | 24 ++++++-
be/src/util/bit-util.h | 40 +++---------
be/src/util/bit-util.inline.h | 130 +++++++++++++++++++++++++++++++++++++
be/src/util/decimal-util.h | 2 +-
4 files changed, 162 insertions(+), 34 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0306dd57/be/src/util/bit-util-test.cc
----------------------------------------------------------------------
diff --git a/be/src/util/bit-util-test.cc b/be/src/util/bit-util-test.cc
index f376737..e2bdd97 100644
--- a/be/src/util/bit-util-test.cc
+++ b/be/src/util/bit-util-test.cc
@@ -21,7 +21,7 @@
#include <gtest/gtest.h>
#include "gutil/bits.h"
-#include "util/bit-util.h"
+#include "util/bit-util.inline.h"
#include "util/cpu-info.h"
#include "common/names.h"
@@ -111,6 +111,28 @@ TEST(BitUtil, ByteSwap) {
EXPECT_EQ(BitUtil::ByteSwap(static_cast<uint16_t>(0)), 0);
EXPECT_EQ(BitUtil::ByteSwap(static_cast<uint16_t>(0x1122)), 0x2211);
+
+ // Test ByteSwap() with an input/output buffer, swapping up to 32 bytes.
+ int buf_size = 32;
+ uint8_t src_buf[buf_size];
+ for (int i = 0; i < buf_size; ++i) {
+ src_buf[i] = i;
+ }
+ uint8_t dst_buf[buf_size];
+ for (int i = 0; i < buf_size; ++i) {
+ // Init dst buffer and swap i bytes.
+ memset(dst_buf, 0, buf_size);
+ BitUtil::ByteSwap(dst_buf, src_buf, i);
+ // Validate the swap results.
+ for (int j = 0; j < i; ++j) {
+ EXPECT_EQ(dst_buf[j], i - j - 1);
+ EXPECT_EQ(dst_buf[j], src_buf[i - j - 1]);
+ }
+ // Check that the dst buffer is otherwise unmodified.
+ for (int j = i; j < buf_size; ++j) {
+ EXPECT_EQ(dst_buf[j], 0);
+ }
+ }
}
TEST(BitUtil, Log2) {
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0306dd57/be/src/util/bit-util.h
----------------------------------------------------------------------
diff --git a/be/src/util/bit-util.h b/be/src/util/bit-util.h
index eed6df3..cae8212 100644
--- a/be/src/util/bit-util.h
+++ b/be/src/util/bit-util.h
@@ -148,49 +148,25 @@ class BitUtil {
return __builtin_bswap64(value);
}
static inline uint64_t ByteSwap(uint64_t value) {
- return static_cast<uint64_t>(__builtin_bswap64(value));
+ return __builtin_bswap64(value);
}
static inline int32_t ByteSwap(int32_t value) {
return __builtin_bswap32(value);
}
static inline uint32_t ByteSwap(uint32_t value) {
- return static_cast<uint32_t>(__builtin_bswap32(value));
+ return __builtin_bswap32(value);
}
static inline int16_t ByteSwap(int16_t value) {
- return (((value >> 8) & 0xff) | ((value & 0xff) << 8));
+ return __builtin_bswap16(value);
}
static inline uint16_t ByteSwap(uint16_t value) {
- return static_cast<uint16_t>(ByteSwap(static_cast<int16_t>(value)));
+ return __builtin_bswap16(value);
}
- /// Write the swapped bytes into dst. Src and st cannot overlap.
- static inline void ByteSwap(void* dst, const void* src, int len) {
- switch (len) {
- case 1:
- *reinterpret_cast<int8_t*>(dst) = *reinterpret_cast<const int8_t*>(src);
- return;
- case 2:
- *reinterpret_cast<int16_t*>(dst) =
- ByteSwap(*reinterpret_cast<const int16_t*>(src));
- return;
- case 4:
- *reinterpret_cast<int32_t*>(dst) =
- ByteSwap(*reinterpret_cast<const int32_t*>(src));
- return;
- case 8:
- *reinterpret_cast<int64_t*>(dst) =
- ByteSwap(*reinterpret_cast<const int64_t*>(src));
- return;
- default: break;
- }
-
- uint8_t* d = reinterpret_cast<uint8_t*>(dst);
- const uint8_t* s = reinterpret_cast<const uint8_t*>(src);
- for (int i = 0; i < len; ++i) {
- d[i] = s[len - i - 1];
- }
-
- }
+ /// Write the swapped bytes into dest; source and dest must not overlap.
+ /// This function is optimized for len <= 16. It reverts to a slow loop-based
+ /// swap for len > 16.
+ static inline void ByteSwap(void* dest, const void* source, int len);
/// Converts to big endian format (if not already in big endian) from the
/// machine's native endian format.
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0306dd57/be/src/util/bit-util.inline.h
----------------------------------------------------------------------
diff --git a/be/src/util/bit-util.inline.h b/be/src/util/bit-util.inline.h
new file mode 100644
index 0000000..33b44c6
--- /dev/null
+++ b/be/src/util/bit-util.inline.h
@@ -0,0 +1,130 @@
+// Copyright 2016 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef IMPALA_BIT_UTIL_INLINE_H
+#define IMPALA_BIT_UTIL_INLINE_H
+
+#include "util/bit-util.h"
+
+namespace impala {
+
+inline void BitUtil::ByteSwap(void* dest, const void* source, int len) {
+ uint8_t* dst = reinterpret_cast<uint8_t*>(dest);
+ const uint8_t* src = reinterpret_cast<const uint8_t*>(source);
+ switch (len) {
+ case 1:
+ *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src);
+ return;
+ case 2:
+ *reinterpret_cast<uint16_t*>(dst) =
+ ByteSwap(*reinterpret_cast<const uint16_t*>(src));
+ return;
+ case 3:
+ *reinterpret_cast<uint16_t*>(dst + 1) =
+ ByteSwap(*reinterpret_cast<const uint16_t*>(src));
+ *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 2);
+ return;
+ case 4:
+ *reinterpret_cast<uint32_t*>(dst) =
+ ByteSwap(*reinterpret_cast<const uint32_t*>(src));
+ return;
+ case 5:
+ *reinterpret_cast<uint32_t*>(dst + 1) =
+ ByteSwap(*reinterpret_cast<const uint32_t*>(src));
+ *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 4);
+ return;
+ case 6:
+ *reinterpret_cast<uint32_t*>(dst + 2) =
+ ByteSwap(*reinterpret_cast<const uint32_t*>(src));
+ *reinterpret_cast<uint16_t*>(dst) =
+ ByteSwap(*reinterpret_cast<const uint16_t*>(src + 4));
+ return;
+ case 7:
+ *reinterpret_cast<uint32_t*>(dst + 3) =
+ ByteSwap(*reinterpret_cast<const uint32_t*>(src));
+ *reinterpret_cast<uint16_t*>(dst + 1) =
+ ByteSwap(*reinterpret_cast<const uint16_t*>(src + 4));
+ *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 6);
+ return;
+ case 8:
+ *reinterpret_cast<uint64_t*>(dst) =
+ ByteSwap(*reinterpret_cast<const uint64_t*>(src));
+ return;
+ case 9:
+ *reinterpret_cast<uint64_t*>(dst + 1) =
+ ByteSwap(*reinterpret_cast<const uint64_t*>(src));
+ *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 8);
+ return;
+ case 10:
+ *reinterpret_cast<uint64_t*>(dst + 2) =
+ ByteSwap(*reinterpret_cast<const uint64_t*>(src));
+ *reinterpret_cast<uint16_t*>(dst) =
+ ByteSwap(*reinterpret_cast<const uint16_t*>(src + 8));
+ return;
+ case 11:
+ *reinterpret_cast<uint64_t*>(dst + 3) =
+ ByteSwap(*reinterpret_cast<const uint64_t*>(src));
+ *reinterpret_cast<uint16_t*>(dst + 1) =
+ ByteSwap(*reinterpret_cast<const uint16_t*>(src + 8));
+ *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 10);
+ return;
+ case 12:
+ *reinterpret_cast<uint64_t*>(dst + 4) =
+ ByteSwap(*reinterpret_cast<const uint64_t*>(src));
+ *reinterpret_cast<uint32_t*>(dst) =
+ ByteSwap(*reinterpret_cast<const uint32_t*>(src + 8));
+ return;
+ case 13:
+ *reinterpret_cast<uint64_t*>(dst + 5) =
+ ByteSwap(*reinterpret_cast<const uint64_t*>(src));
+ *reinterpret_cast<uint32_t*>(dst + 1) =
+ ByteSwap(*reinterpret_cast<const uint32_t*>(src + 8));
+ *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 12);
+ return;
+ case 14:
+ *reinterpret_cast<uint64_t*>(dst + 6) =
+ ByteSwap(*reinterpret_cast<const uint64_t*>(src));
+ *reinterpret_cast<uint32_t*>(dst + 2) =
+ ByteSwap(*reinterpret_cast<const uint32_t*>(src + 8));
+ *reinterpret_cast<uint16_t*>(dst) =
+ ByteSwap(*reinterpret_cast<const uint16_t*>(src + 12));
+ return;
+ case 15:
+ *reinterpret_cast<uint64_t*>(dst + 7) =
+ ByteSwap(*reinterpret_cast<const uint64_t*>(src));
+ *reinterpret_cast<uint32_t*>(dst + 3) =
+ ByteSwap(*reinterpret_cast<const uint32_t*>(src + 8));
+ *reinterpret_cast<uint16_t*>(dst + 1) =
+ ByteSwap(*reinterpret_cast<const uint16_t*>(src + 12));
+ *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 14);
+ return;
+ case 16:
+ *reinterpret_cast<uint64_t*>(dst + 8) =
+ ByteSwap(*reinterpret_cast<const uint64_t*>(src));
+ *reinterpret_cast<uint64_t*>(dst) =
+ ByteSwap(*reinterpret_cast<const uint64_t*>(src + 8));
+ return;
+ default:
+ // Revert to slow loop-based swap.
+ for (int i = 0; i < len; ++i) {
+ dst[i] = src[len - i - 1];
+ }
+ return;
+ }
+}
+
+}
+
+#endif
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0306dd57/be/src/util/decimal-util.h
----------------------------------------------------------------------
diff --git a/be/src/util/decimal-util.h b/be/src/util/decimal-util.h
index 79b7130..2ce69bc 100644
--- a/be/src/util/decimal-util.h
+++ b/be/src/util/decimal-util.h
@@ -22,7 +22,7 @@
#include "runtime/multi-precision.h"
#include "runtime/types.h"
-#include "util/bit-util.h"
+#include "util/bit-util.inline.h"
namespace impala {