You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2020/04/13 16:41:57 UTC

[impala] 01/04: IMPALA-9428 Add arm64 atomic ops

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit c52b1d8a689584f5502822cecd54c363c768f71d
Author: zhaorenhai <zh...@hotmail.com>
AuthorDate: Wed Feb 26 10:53:58 2020 +0000

    IMPALA-9428 Add arm64 atomic ops
    
    Atomic ops are implemented by asm.
    Different arc have diffent implementation.
    Here add arm64 atomic ops implementation.
    The file atomicops-internals-arm64.h is originally from here:
    https://github.com/protocolbuffers/protobuf/blob/3.5.x/src/google/protobuf/stubs/atomicops_internals_arm64_gcc.h
    and the commit hash is ec021f5.
    And I changed some instructions to newer version base
     the original file, such as ldxr + memory barrier was
     changed to ldaxr which has memory barrier natively.
    And also added some funtions which impala use.
    
    Change-Id: I469e0169193ad6ad8acca2a800c8b3f043083ddd
---
 be/src/common/init.cc                    |   2 +
 be/src/gutil/atomicops-internals-arm64.h | 474 +++++++++++++++++++++++++++++++
 be/src/gutil/atomicops.h                 |   2 +
 be/src/gutil/cpu.cc                      |   3 +
 4 files changed, 481 insertions(+)

diff --git a/be/src/common/init.cc b/be/src/common/init.cc
index e8069ef..278a563 100644
--- a/be/src/common/init.cc
+++ b/be/src/common/init.cc
@@ -305,8 +305,10 @@ void impala::InitCommonRuntime(int argc, char** argv, bool init_jvm,
   // Breakpad needs flags and logging to initialize.
   ABORT_IF_ERROR(RegisterMinidump(argv[0]));
 #ifndef THREAD_SANITIZER
+#ifndef __aarch64__
   AtomicOps_x86CPUFeaturesInit();
 #endif
+#endif
   impala::InitThreading();
   impala::datetime_parse_util::SimpleDateFormatTokenizer::InitCtx();
   impala::SeedOpenSSLRNG();
diff --git a/be/src/gutil/atomicops-internals-arm64.h b/be/src/gutil/atomicops-internals-arm64.h
new file mode 100644
index 0000000..5111ecc
--- /dev/null
+++ b/be/src/gutil/atomicops-internals-arm64.h
@@ -0,0 +1,474 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2012 Google Inc.  All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// This file is an internal atomic implementation, use atomicops.h instead.
+
+#ifndef GUTIL_ATOMICOPS_INTERNALS_ARM64_H_
+#define GUTIL_ATOMICOPS_INTERNALS_ARM64_H_
+
+typedef int32_t Atomic32;
+typedef int64_t Atomic64;
+
+namespace base {
+namespace subtle {
+
+typedef int32_t Atomic32;
+typedef int64_t Atomic64;
+
+inline void MemoryBarrier() {
+  __asm__ __volatile__ ("dmb ish" ::: "memory");  // NOLINT
+}
+// NoBarrier versions of the operation include "memory" in the clobber list.
+// This is not required for direct usage of the NoBarrier versions of the
+// operations. However this is required for correctness when they are used as
+// part of the Acquire or Release versions, to ensure that nothing from outside
+// the call is reordered between the operation and the memory barrier. This does
+// not change the code generated, so has no or minimal impact on the
+// NoBarrier operations.
+
+inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
+                                         Atomic32 old_value,
+                                         Atomic32 new_value) {
+  Atomic32 prev;
+  int32_t temp;
+
+  __asm__ __volatile__ (  // NOLINT
+    "0:                                    \n\t"
+    "ldxr %w[prev], %[ptr]                 \n\t"  // Load the previous value.
+    "cmp %w[prev], %w[old_value]           \n\t"
+    "bne 1f                                \n\t"
+    "stxr %w[temp], %w[new_value], %[ptr]  \n\t"  // Try to store the new value.
+    "cbnz %w[temp], 0b                     \n\t"  // Retry if it did not work.
+    "1:                                    \n\t"
+    : [prev]"=&r" (prev),
+      [temp]"=&r" (temp),
+      [ptr]"+Q" (*ptr)
+    : [old_value]"IJr" (old_value),
+      [new_value]"r" (new_value)
+    : "cc", "memory"
+  );  // NOLINT
+
+  return prev;
+}
+
+inline Atomic32 Barrier_CompareAndSwap(volatile Atomic32* ptr,
+                                         Atomic32 old_value,
+                                         Atomic32 new_value) {
+  Atomic32 prev;
+  int32_t temp;
+
+  __asm__ __volatile__ (  // NOLINT
+    "0:                                    \n\t"
+    "ldaxr %w[prev], %[ptr]                 \n\t"  // Load the previous value.
+    "cmp %w[prev], %w[old_value]           \n\t"
+    "bne 1f                                \n\t"
+    "stlxr %w[temp], %w[new_value], %[ptr]  \n\t"  // Try to store the new value.
+    "cbnz %w[temp], 0b                     \n\t"  // Retry if it did not work.
+    "1:                                    \n\t"
+    : [prev]"=&r" (prev),
+      [temp]"=&r" (temp),
+      [ptr]"+Q" (*ptr)
+    : [old_value]"IJr" (old_value),
+      [new_value]"r" (new_value)
+    : "cc", "memory"
+  );  // NOLINT
+
+  return prev;
+}
+
+
+inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
+                                         Atomic32 new_value) {
+  Atomic32 result;
+  int32_t temp;
+
+  __asm__ __volatile__ (  // NOLINT
+    "0:                                    \n\t"
+    "ldxr %w[result], %[ptr]               \n\t"  // Load the previous value.
+    "stxr %w[temp], %w[new_value], %[ptr]  \n\t"  // Try to store the new value.
+    "cbnz %w[temp], 0b                     \n\t"  // Retry if it did not work.
+    : [result]"=&r" (result),
+      [temp]"=&r" (temp),
+      [ptr]"+Q" (*ptr)
+    : [new_value]"r" (new_value)
+    : "memory"
+  );  // NOLINT
+
+  return result;
+}
+
+inline Atomic32 Barrier_AtomicExchange(volatile Atomic32* ptr,
+                                         Atomic32 new_value) {
+  Atomic32 result;
+  int32_t temp;
+
+  __asm__ __volatile__ (  // NOLINT
+    "0:                                    \n\t"
+    "ldaxr %w[result], %[ptr]               \n\t"  // Load the previous value.
+    "stlxr %w[temp], %w[new_value], %[ptr]  \n\t"  // Try to store the new value.
+    "cbnz %w[temp], 0b                     \n\t"  // Retry if it did not work.
+    : [result]"=&r" (result),
+      [temp]"=&r" (temp),
+      [ptr]"+Q" (*ptr)
+    : [new_value]"r" (new_value)
+    : "memory"
+  );  // NOLINT
+
+  return result;
+}
+
+
+inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
+                                       Atomic32 new_value) {
+  Atomic32 old_val = NoBarrier_AtomicExchange(ptr, new_value);
+  MemoryBarrier();
+  return old_val;
+}
+
+inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
+                                       Atomic32 new_value) {
+  MemoryBarrier();
+  return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
+inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
+                                          Atomic32 increment) {
+  Atomic32 result;
+  int32_t temp;
+
+  __asm__ __volatile__ (  // NOLINT
+    "0:                                       \n\t"
+    "ldxr %w[result], %[ptr]                  \n\t"  // Load the previous value.
+    "add %w[result], %w[result], %w[increment]\n\t"
+    "stxr %w[temp], %w[result], %[ptr]        \n\t"  // Try to store the result.
+    "cbnz %w[temp], 0b                        \n\t"  // Retry on failure.
+    : [result]"=&r" (result),
+      [temp]"=&r" (temp),
+      [ptr]"+Q" (*ptr)
+    : [increment]"IJr" (increment)
+    : "memory"
+  );  // NOLINT
+
+  return result;
+}
+
+inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
+                                        Atomic32 increment) {
+  Atomic32 result;
+  int32_t temp;
+
+  __asm__ __volatile__ (  // NOLINT
+    "0:                                       \n\t"
+    "ldaxr %w[result], %[ptr]                  \n\t"  // Load the previous value.
+    "add %w[result], %w[result], %w[increment]\n\t"
+    "stlxr %w[temp], %w[result], %[ptr]        \n\t"  // Try to store the result.
+    "cbnz %w[temp], 0b                        \n\t"  // Retry on failure.
+    : [result]"=&r" (result),
+      [temp]"=&r" (temp),
+      [ptr]"+Q" (*ptr)
+    : [increment]"IJr" (increment)
+    : "memory"
+  );
+  return result;
+}
+
+inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
+                                       Atomic32 old_value,
+                                       Atomic32 new_value) {
+  Atomic32 prev = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+  MemoryBarrier();
+  return prev;
+}
+
+inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
+                                       Atomic32 old_value,
+                                       Atomic32 new_value) {
+  MemoryBarrier();
+  Atomic32 prev = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+  return prev;
+}
+
+inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
+  *ptr = value;
+}
+
+inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
+  *ptr = value;
+  MemoryBarrier();
+}
+
+inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
+  __asm__ __volatile__ (  // NOLINT
+    "stlr %w[value], %[ptr]  \n\t"
+    : [ptr]"=Q" (*ptr)
+    : [value]"r" (value)
+    : "memory"
+  );  // NOLINT
+}
+
+inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
+  return *ptr;
+}
+
+inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
+  Atomic32 value;
+
+  __asm__ __volatile__ (  // NOLINT
+    "ldar %w[value], %[ptr]  \n\t"
+    : [value]"=r" (value)
+    : [ptr]"Q" (*ptr)
+    : "memory"
+  );  // NOLINT
+
+  return value;
+}
+
+inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
+  MemoryBarrier();
+  return *ptr;
+}
+
+// 64-bit versions of the operations.
+// See the 32-bit versions for comments.
+
+inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
+                                         Atomic64 old_value,
+                                         Atomic64 new_value) {
+  Atomic64 prev;
+  int32_t temp;
+
+  __asm__ __volatile__ (  // NOLINT
+    "0:                                    \n\t"
+    "ldxr %[prev], %[ptr]                  \n\t"
+    "cmp %[prev], %[old_value]             \n\t"
+    "bne 1f                                \n\t"
+    "stxr %w[temp], %[new_value], %[ptr]   \n\t"
+    "cbnz %w[temp], 0b                     \n\t"
+    "1:                                    \n\t"
+    : [prev]"=&r" (prev),
+      [temp]"=&r" (temp),
+      [ptr]"+Q" (*ptr)
+    : [old_value]"IJr" (old_value),
+      [new_value]"r" (new_value)
+    : "cc", "memory"
+  );  // NOLINT
+
+  return prev;
+}
+
+
+inline Atomic64 Barrier_CompareAndSwap(volatile Atomic64* ptr,
+                                         Atomic64 old_value,
+                                         Atomic64 new_value) {
+  Atomic64 prev;
+  int32_t temp;
+
+  __asm__ __volatile__ (  // NOLINT
+    "0:                                    \n\t"
+    "ldaxr %[prev], %[ptr]                  \n\t"
+    "cmp %[prev], %[old_value]             \n\t"
+    "bne 1f                                \n\t"
+    "stlxr %w[temp], %[new_value], %[ptr]   \n\t"
+    "cbnz %w[temp], 0b                     \n\t"
+    "1:                                    \n\t"
+    : [prev]"=&r" (prev),
+      [temp]"=&r" (temp),
+      [ptr]"+Q" (*ptr)
+    : [old_value]"IJr" (old_value),
+      [new_value]"r" (new_value)
+    : "cc", "memory"
+  );  // NOLINT
+
+  return prev;
+}
+
+
+inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
+                                         Atomic64 new_value) {
+  Atomic64 result;
+  int32_t temp;
+
+  __asm__ __volatile__ (  // NOLINT
+    "0:                                    \n\t"
+    "ldxr %[result], %[ptr]                \n\t"
+    "stxr %w[temp], %[new_value], %[ptr]   \n\t"
+    "cbnz %w[temp], 0b                     \n\t"
+    : [result]"=&r" (result),
+      [temp]"=&r" (temp),
+      [ptr]"+Q" (*ptr)
+    : [new_value]"r" (new_value)
+    : "memory"
+  );  // NOLINT
+
+  return result;
+}
+
+inline Atomic64 Barrier_AtomicExchange(volatile Atomic64* ptr,
+                                         Atomic64 new_value) {
+  Atomic64 result;
+  int32_t temp;
+
+  __asm__ __volatile__ (  // NOLINT
+    "0:                                    \n\t"
+    "ldaxr %[result], %[ptr]                \n\t"
+    "stlxr %w[temp], %[new_value], %[ptr]   \n\t"
+    "cbnz %w[temp], 0b                     \n\t"
+    : [result]"=&r" (result),
+      [temp]"=&r" (temp),
+      [ptr]"+Q" (*ptr)
+    : [new_value]"r" (new_value)
+    : "memory"
+  );  // NOLINT
+
+  return result;
+}
+
+
+inline void PauseCPU() {
+  __asm__ __volatile__("yield" : : : "memory");
+}
+
+inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
+                                       Atomic64 new_value) {
+  Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_value);
+  MemoryBarrier();
+  return old_val;
+}
+
+inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
+                                       Atomic64 new_value) {
+  MemoryBarrier();
+  return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
+inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
+                                          Atomic64 increment) {
+  Atomic64 result;
+  int32_t temp;
+
+  __asm__ __volatile__ (  // NOLINT
+    "0:                                     \n\t"
+    "ldxr %[result], %[ptr]                 \n\t"
+    "add %[result], %[result], %[increment] \n\t"
+    "stxr %w[temp], %[result], %[ptr]       \n\t"
+    "cbnz %w[temp], 0b                      \n\t"
+    : [result]"=&r" (result),
+      [temp]"=&r" (temp),
+      [ptr]"+Q" (*ptr)
+    : [increment]"IJr" (increment)
+    : "memory"
+  );  // NOLINT
+
+  return result;
+}
+
+inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr,
+                                        Atomic64 increment) {
+  Atomic64 result;
+  int32_t temp;
+
+  __asm__ __volatile__ (  // NOLINT
+    "0:                                     \n\t"
+    "ldaxr %[result], %[ptr]                 \n\t"
+    "add %[result], %[result], %[increment] \n\t"
+    "stlxr %w[temp], %[result], %[ptr]       \n\t"
+    "cbnz %w[temp], 0b                      \n\t"
+    : [result]"=&r" (result),
+      [temp]"=&r" (temp),
+      [ptr]"+Q" (*ptr)
+    : [increment]"IJr" (increment)
+    : "memory"
+  );  // NOLINT
+
+  return result;
+}
+
+inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
+                                       Atomic64 old_value,
+                                       Atomic64 new_value) {
+  Atomic64 prev = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+  MemoryBarrier();
+
+  return prev;
+}
+
+inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
+                                       Atomic64 old_value,
+                                       Atomic64 new_value) {
+  MemoryBarrier();
+  Atomic64 prev = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+
+  return prev;
+}
+
+inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
+  *ptr = value;
+}
+
+inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
+  *ptr = value;
+  MemoryBarrier();
+}
+
+inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
+  __asm__ __volatile__ (  // NOLINT
+    "stlr %x[value], %[ptr]  \n\t"
+    : [ptr]"=Q" (*ptr)
+    : [value]"r" (value)
+    : "memory"
+  );  // NOLINT
+}
+
+inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
+  return *ptr;
+}
+
+inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
+  Atomic64 value;
+
+  __asm__ __volatile__ (  // NOLINT
+    "ldar %x[value], %[ptr]  \n\t"
+    : [value]"=r" (value)
+    : [ptr]"Q" (*ptr)
+    : "memory"
+  );  // NOLINT
+
+  return value;
+}
+
+inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
+  MemoryBarrier();
+  return *ptr;
+}
+
+}  // namespace subtle
+}  // namespace base
+
+#endif  // GUTIL_ATOMICOPS_INTERNALS_ARM64_GCC_H_
+
diff --git a/be/src/gutil/atomicops.h b/be/src/gutil/atomicops.h
index ad8d773..b358d6e 100644
--- a/be/src/gutil/atomicops.h
+++ b/be/src/gutil/atomicops.h
@@ -74,6 +74,8 @@
 #include "gutil/atomicops-internals-tsan.h"
 #elif defined(__APPLE__)
 #include "gutil/atomicops-internals-macosx.h"
+#elif defined(__GNUC__) && defined(__aarch64__)
+#include "gutil/atomicops-internals-arm64.h"
 #elif defined(__GNUC__) && defined(ARMV6)
 #include "gutil/atomicops-internals-arm-v6plus.h"
 #elif defined(ARMV3)
diff --git a/be/src/gutil/cpu.cc b/be/src/gutil/cpu.cc
index 3ebfa4a..d59a19c 100644
--- a/be/src/gutil/cpu.cc
+++ b/be/src/gutil/cpu.cc
@@ -269,6 +269,9 @@ void CPU::Initialize() {
 #elif defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) || defined(OS_LINUX))
   cpu_brand_.assign(g_lazy_cpuinfo.Get().brand());
   has_broken_neon_ = g_lazy_cpuinfo.Get().has_broken_neon();
+#elif defined(__aarch64__)
+  cpu_brand_.assign("ARM64");
+  has_broken_neon_ = false;
 #else
   #error unknown architecture
 #endif