You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2020/04/13 16:41:57 UTC
[impala] 01/04: IMPALA-9428 Add arm64 atomic ops
This is an automated email from the ASF dual-hosted git repository.
tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit c52b1d8a689584f5502822cecd54c363c768f71d
Author: zhaorenhai <zh...@hotmail.com>
AuthorDate: Wed Feb 26 10:53:58 2020 +0000
IMPALA-9428 Add arm64 atomic ops
Atomic ops are implemented by asm.
Different arc have diffent implementation.
Here add arm64 atomic ops implementation.
The file atomicops-internals-arm64.h is originally from here:
https://github.com/protocolbuffers/protobuf/blob/3.5.x/src/google/protobuf/stubs/atomicops_internals_arm64_gcc.h
and the commit hash is ec021f5.
And I changed some instructions to newer version base
the original file, such as ldxr + memory barrier was
changed to ldaxr which has memory barrier natively.
And also added some funtions which impala use.
Change-Id: I469e0169193ad6ad8acca2a800c8b3f043083ddd
---
be/src/common/init.cc | 2 +
be/src/gutil/atomicops-internals-arm64.h | 474 +++++++++++++++++++++++++++++++
be/src/gutil/atomicops.h | 2 +
be/src/gutil/cpu.cc | 3 +
4 files changed, 481 insertions(+)
diff --git a/be/src/common/init.cc b/be/src/common/init.cc
index e8069ef..278a563 100644
--- a/be/src/common/init.cc
+++ b/be/src/common/init.cc
@@ -305,8 +305,10 @@ void impala::InitCommonRuntime(int argc, char** argv, bool init_jvm,
// Breakpad needs flags and logging to initialize.
ABORT_IF_ERROR(RegisterMinidump(argv[0]));
#ifndef THREAD_SANITIZER
+#ifndef __aarch64__
AtomicOps_x86CPUFeaturesInit();
#endif
+#endif
impala::InitThreading();
impala::datetime_parse_util::SimpleDateFormatTokenizer::InitCtx();
impala::SeedOpenSSLRNG();
diff --git a/be/src/gutil/atomicops-internals-arm64.h b/be/src/gutil/atomicops-internals-arm64.h
new file mode 100644
index 0000000..5111ecc
--- /dev/null
+++ b/be/src/gutil/atomicops-internals-arm64.h
@@ -0,0 +1,474 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2012 Google Inc. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// This file is an internal atomic implementation, use atomicops.h instead.
+
+#ifndef GUTIL_ATOMICOPS_INTERNALS_ARM64_H_
+#define GUTIL_ATOMICOPS_INTERNALS_ARM64_H_
+
+typedef int32_t Atomic32;
+typedef int64_t Atomic64;
+
+namespace base {
+namespace subtle {
+
+typedef int32_t Atomic32;
+typedef int64_t Atomic64;
+
+inline void MemoryBarrier() {
+ __asm__ __volatile__ ("dmb ish" ::: "memory"); // NOLINT
+}
+// NoBarrier versions of the operation include "memory" in the clobber list.
+// This is not required for direct usage of the NoBarrier versions of the
+// operations. However this is required for correctness when they are used as
+// part of the Acquire or Release versions, to ensure that nothing from outside
+// the call is reordered between the operation and the memory barrier. This does
+// not change the code generated, so has no or minimal impact on the
+// NoBarrier operations.
+
+inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
+ Atomic32 old_value,
+ Atomic32 new_value) {
+ Atomic32 prev;
+ int32_t temp;
+
+ __asm__ __volatile__ ( // NOLINT
+ "0: \n\t"
+ "ldxr %w[prev], %[ptr] \n\t" // Load the previous value.
+ "cmp %w[prev], %w[old_value] \n\t"
+ "bne 1f \n\t"
+ "stxr %w[temp], %w[new_value], %[ptr] \n\t" // Try to store the new value.
+ "cbnz %w[temp], 0b \n\t" // Retry if it did not work.
+ "1: \n\t"
+ : [prev]"=&r" (prev),
+ [temp]"=&r" (temp),
+ [ptr]"+Q" (*ptr)
+ : [old_value]"IJr" (old_value),
+ [new_value]"r" (new_value)
+ : "cc", "memory"
+ ); // NOLINT
+
+ return prev;
+}
+
+inline Atomic32 Barrier_CompareAndSwap(volatile Atomic32* ptr,
+ Atomic32 old_value,
+ Atomic32 new_value) {
+ Atomic32 prev;
+ int32_t temp;
+
+ __asm__ __volatile__ ( // NOLINT
+ "0: \n\t"
+ "ldaxr %w[prev], %[ptr] \n\t" // Load the previous value.
+ "cmp %w[prev], %w[old_value] \n\t"
+ "bne 1f \n\t"
+ "stlxr %w[temp], %w[new_value], %[ptr] \n\t" // Try to store the new value.
+ "cbnz %w[temp], 0b \n\t" // Retry if it did not work.
+ "1: \n\t"
+ : [prev]"=&r" (prev),
+ [temp]"=&r" (temp),
+ [ptr]"+Q" (*ptr)
+ : [old_value]"IJr" (old_value),
+ [new_value]"r" (new_value)
+ : "cc", "memory"
+ ); // NOLINT
+
+ return prev;
+}
+
+
+inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
+ Atomic32 new_value) {
+ Atomic32 result;
+ int32_t temp;
+
+ __asm__ __volatile__ ( // NOLINT
+ "0: \n\t"
+ "ldxr %w[result], %[ptr] \n\t" // Load the previous value.
+ "stxr %w[temp], %w[new_value], %[ptr] \n\t" // Try to store the new value.
+ "cbnz %w[temp], 0b \n\t" // Retry if it did not work.
+ : [result]"=&r" (result),
+ [temp]"=&r" (temp),
+ [ptr]"+Q" (*ptr)
+ : [new_value]"r" (new_value)
+ : "memory"
+ ); // NOLINT
+
+ return result;
+}
+
+inline Atomic32 Barrier_AtomicExchange(volatile Atomic32* ptr,
+ Atomic32 new_value) {
+ Atomic32 result;
+ int32_t temp;
+
+ __asm__ __volatile__ ( // NOLINT
+ "0: \n\t"
+ "ldaxr %w[result], %[ptr] \n\t" // Load the previous value.
+ "stlxr %w[temp], %w[new_value], %[ptr] \n\t" // Try to store the new value.
+ "cbnz %w[temp], 0b \n\t" // Retry if it did not work.
+ : [result]"=&r" (result),
+ [temp]"=&r" (temp),
+ [ptr]"+Q" (*ptr)
+ : [new_value]"r" (new_value)
+ : "memory"
+ ); // NOLINT
+
+ return result;
+}
+
+
+inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
+ Atomic32 new_value) {
+ Atomic32 old_val = NoBarrier_AtomicExchange(ptr, new_value);
+ MemoryBarrier();
+ return old_val;
+}
+
+inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
+ Atomic32 new_value) {
+ MemoryBarrier();
+ return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
+inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
+ Atomic32 increment) {
+ Atomic32 result;
+ int32_t temp;
+
+ __asm__ __volatile__ ( // NOLINT
+ "0: \n\t"
+ "ldxr %w[result], %[ptr] \n\t" // Load the previous value.
+ "add %w[result], %w[result], %w[increment]\n\t"
+ "stxr %w[temp], %w[result], %[ptr] \n\t" // Try to store the result.
+ "cbnz %w[temp], 0b \n\t" // Retry on failure.
+ : [result]"=&r" (result),
+ [temp]"=&r" (temp),
+ [ptr]"+Q" (*ptr)
+ : [increment]"IJr" (increment)
+ : "memory"
+ ); // NOLINT
+
+ return result;
+}
+
+inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
+ Atomic32 increment) {
+ Atomic32 result;
+ int32_t temp;
+
+ __asm__ __volatile__ ( // NOLINT
+ "0: \n\t"
+ "ldaxr %w[result], %[ptr] \n\t" // Load the previous value.
+ "add %w[result], %w[result], %w[increment]\n\t"
+ "stlxr %w[temp], %w[result], %[ptr] \n\t" // Try to store the result.
+ "cbnz %w[temp], 0b \n\t" // Retry on failure.
+ : [result]"=&r" (result),
+ [temp]"=&r" (temp),
+ [ptr]"+Q" (*ptr)
+ : [increment]"IJr" (increment)
+ : "memory"
+ );
+ return result;
+}
+
+inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
+ Atomic32 old_value,
+ Atomic32 new_value) {
+ Atomic32 prev = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+ MemoryBarrier();
+ return prev;
+}
+
+inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
+ Atomic32 old_value,
+ Atomic32 new_value) {
+ MemoryBarrier();
+ Atomic32 prev = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+ return prev;
+}
+
+inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
+ *ptr = value;
+}
+
+inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
+ *ptr = value;
+ MemoryBarrier();
+}
+
+inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
+ __asm__ __volatile__ ( // NOLINT
+ "stlr %w[value], %[ptr] \n\t"
+ : [ptr]"=Q" (*ptr)
+ : [value]"r" (value)
+ : "memory"
+ ); // NOLINT
+}
+
+inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
+ return *ptr;
+}
+
+inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
+ Atomic32 value;
+
+ __asm__ __volatile__ ( // NOLINT
+ "ldar %w[value], %[ptr] \n\t"
+ : [value]"=r" (value)
+ : [ptr]"Q" (*ptr)
+ : "memory"
+ ); // NOLINT
+
+ return value;
+}
+
+inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
+ MemoryBarrier();
+ return *ptr;
+}
+
+// 64-bit versions of the operations.
+// See the 32-bit versions for comments.
+
+inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
+ Atomic64 old_value,
+ Atomic64 new_value) {
+ Atomic64 prev;
+ int32_t temp;
+
+ __asm__ __volatile__ ( // NOLINT
+ "0: \n\t"
+ "ldxr %[prev], %[ptr] \n\t"
+ "cmp %[prev], %[old_value] \n\t"
+ "bne 1f \n\t"
+ "stxr %w[temp], %[new_value], %[ptr] \n\t"
+ "cbnz %w[temp], 0b \n\t"
+ "1: \n\t"
+ : [prev]"=&r" (prev),
+ [temp]"=&r" (temp),
+ [ptr]"+Q" (*ptr)
+ : [old_value]"IJr" (old_value),
+ [new_value]"r" (new_value)
+ : "cc", "memory"
+ ); // NOLINT
+
+ return prev;
+}
+
+
+inline Atomic64 Barrier_CompareAndSwap(volatile Atomic64* ptr,
+ Atomic64 old_value,
+ Atomic64 new_value) {
+ Atomic64 prev;
+ int32_t temp;
+
+ __asm__ __volatile__ ( // NOLINT
+ "0: \n\t"
+ "ldaxr %[prev], %[ptr] \n\t"
+ "cmp %[prev], %[old_value] \n\t"
+ "bne 1f \n\t"
+ "stlxr %w[temp], %[new_value], %[ptr] \n\t"
+ "cbnz %w[temp], 0b \n\t"
+ "1: \n\t"
+ : [prev]"=&r" (prev),
+ [temp]"=&r" (temp),
+ [ptr]"+Q" (*ptr)
+ : [old_value]"IJr" (old_value),
+ [new_value]"r" (new_value)
+ : "cc", "memory"
+ ); // NOLINT
+
+ return prev;
+}
+
+
+inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
+ Atomic64 new_value) {
+ Atomic64 result;
+ int32_t temp;
+
+ __asm__ __volatile__ ( // NOLINT
+ "0: \n\t"
+ "ldxr %[result], %[ptr] \n\t"
+ "stxr %w[temp], %[new_value], %[ptr] \n\t"
+ "cbnz %w[temp], 0b \n\t"
+ : [result]"=&r" (result),
+ [temp]"=&r" (temp),
+ [ptr]"+Q" (*ptr)
+ : [new_value]"r" (new_value)
+ : "memory"
+ ); // NOLINT
+
+ return result;
+}
+
+inline Atomic64 Barrier_AtomicExchange(volatile Atomic64* ptr,
+ Atomic64 new_value) {
+ Atomic64 result;
+ int32_t temp;
+
+ __asm__ __volatile__ ( // NOLINT
+ "0: \n\t"
+ "ldaxr %[result], %[ptr] \n\t"
+ "stlxr %w[temp], %[new_value], %[ptr] \n\t"
+ "cbnz %w[temp], 0b \n\t"
+ : [result]"=&r" (result),
+ [temp]"=&r" (temp),
+ [ptr]"+Q" (*ptr)
+ : [new_value]"r" (new_value)
+ : "memory"
+ ); // NOLINT
+
+ return result;
+}
+
+
+inline void PauseCPU() {
+ __asm__ __volatile__("yield" : : : "memory");
+}
+
+inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
+ Atomic64 new_value) {
+ Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_value);
+ MemoryBarrier();
+ return old_val;
+}
+
+inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
+ Atomic64 new_value) {
+ MemoryBarrier();
+ return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
+inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
+ Atomic64 increment) {
+ Atomic64 result;
+ int32_t temp;
+
+ __asm__ __volatile__ ( // NOLINT
+ "0: \n\t"
+ "ldxr %[result], %[ptr] \n\t"
+ "add %[result], %[result], %[increment] \n\t"
+ "stxr %w[temp], %[result], %[ptr] \n\t"
+ "cbnz %w[temp], 0b \n\t"
+ : [result]"=&r" (result),
+ [temp]"=&r" (temp),
+ [ptr]"+Q" (*ptr)
+ : [increment]"IJr" (increment)
+ : "memory"
+ ); // NOLINT
+
+ return result;
+}
+
+inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr,
+ Atomic64 increment) {
+ Atomic64 result;
+ int32_t temp;
+
+ __asm__ __volatile__ ( // NOLINT
+ "0: \n\t"
+ "ldaxr %[result], %[ptr] \n\t"
+ "add %[result], %[result], %[increment] \n\t"
+ "stlxr %w[temp], %[result], %[ptr] \n\t"
+ "cbnz %w[temp], 0b \n\t"
+ : [result]"=&r" (result),
+ [temp]"=&r" (temp),
+ [ptr]"+Q" (*ptr)
+ : [increment]"IJr" (increment)
+ : "memory"
+ ); // NOLINT
+
+ return result;
+}
+
+inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
+ Atomic64 old_value,
+ Atomic64 new_value) {
+ Atomic64 prev = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+ MemoryBarrier();
+
+ return prev;
+}
+
+inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
+ Atomic64 old_value,
+ Atomic64 new_value) {
+ MemoryBarrier();
+ Atomic64 prev = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+
+ return prev;
+}
+
+inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
+ *ptr = value;
+}
+
+inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
+ *ptr = value;
+ MemoryBarrier();
+}
+
+inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
+ __asm__ __volatile__ ( // NOLINT
+ "stlr %x[value], %[ptr] \n\t"
+ : [ptr]"=Q" (*ptr)
+ : [value]"r" (value)
+ : "memory"
+ ); // NOLINT
+}
+
+inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
+ return *ptr;
+}
+
+inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
+ Atomic64 value;
+
+ __asm__ __volatile__ ( // NOLINT
+ "ldar %x[value], %[ptr] \n\t"
+ : [value]"=r" (value)
+ : [ptr]"Q" (*ptr)
+ : "memory"
+ ); // NOLINT
+
+ return value;
+}
+
+inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
+ MemoryBarrier();
+ return *ptr;
+}
+
+} // namespace subtle
+} // namespace base
+
+#endif // GUTIL_ATOMICOPS_INTERNALS_ARM64_GCC_H_
+
diff --git a/be/src/gutil/atomicops.h b/be/src/gutil/atomicops.h
index ad8d773..b358d6e 100644
--- a/be/src/gutil/atomicops.h
+++ b/be/src/gutil/atomicops.h
@@ -74,6 +74,8 @@
#include "gutil/atomicops-internals-tsan.h"
#elif defined(__APPLE__)
#include "gutil/atomicops-internals-macosx.h"
+#elif defined(__GNUC__) && defined(__aarch64__)
+#include "gutil/atomicops-internals-arm64.h"
#elif defined(__GNUC__) && defined(ARMV6)
#include "gutil/atomicops-internals-arm-v6plus.h"
#elif defined(ARMV3)
diff --git a/be/src/gutil/cpu.cc b/be/src/gutil/cpu.cc
index 3ebfa4a..d59a19c 100644
--- a/be/src/gutil/cpu.cc
+++ b/be/src/gutil/cpu.cc
@@ -269,6 +269,9 @@ void CPU::Initialize() {
#elif defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) || defined(OS_LINUX))
cpu_brand_.assign(g_lazy_cpuinfo.Get().brand());
has_broken_neon_ = g_lazy_cpuinfo.Get().has_broken_neon();
+#elif defined(__aarch64__)
+ cpu_brand_.assign("ARM64");
+ has_broken_neon_ = false;
#else
#error unknown architecture
#endif