You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@apr.apache.org by yl...@apache.org on 2023/03/02 22:36:43 UTC

svn commit: r1908002 - in /apr/apr/branches/1.7.x: ./ atomic/unix/builtins.c atomic/unix/builtins64.c atomic/unix/mutex64.c configure.in test/testatomic.c

Author: ylavic
Date: Thu Mar  2 22:36:42 2023
New Revision: 1908002

URL: http://svn.apache.org/viewvc?rev=1908002&view=rev
Log:
apr_atomic: Generic apr_atomic_read64() needs a mutex on 32bit systems (tearing).

A 64bit load on a 32 bit CPU/system uses two instructions (tearing), so ensure
atomicity with regard to other atomic functions by using the (same) lock.

test_atomics_threaded64() fails because of this on 32bit systems. PR 66457.

* atomic/unix/mutex64.c(apr_atomic_read64):
  Use locking when APR_SIZEOF_VOIDP < 8


atomic64: Generic apr_atomic_read64() to always use a lock.

Don't play games with sizeof(void*) to determine whether a raw load
intruction is atomic or not. Systems that fall back to the generic
implementation are not eligible for the compiler builtins or CPU
native atomic intructions already, and we don't want to reimplement
that here (e.g. alignment, ordering guarantees, ...).

* atomic/unix/mutex64.c(apr_atomic_read64):
  No #ifdefery, always take the lock.

Follow up to r1907541.


atomic: No raw 64bit load/store on 32bit systems or anything but x86_64 or s390x.

Raw 64 bit load and store need two intructions on 32bit systems (tearing) so
they are not atomic, and only x86(_64) and s390(x) have stong mempry ordering
guarantees. Always use builtin functions for the cases where raw load/store
don't work as expected.

* atomic/unix/builtins.c, atomic/unix/builtins64.c:
  Use an accept-list rather than a reject-list to define WEAK_MEMORY_ORDERING.
  Test APR_SIZEOF_VOIDP < 8 to force usage of __sync builtins for _read{32,64}
  and _set{32,64} on 32bit systems when __atomic_{load,store} buitlins are not
  available.


configure: Test apr_uint64_t alignment for 64bit atomic builtins usability.

On some systems the __atomic builtins may be available only through libatomic
or fall back to libatomic when the atomic operations are not issued on a
suitably aligned address (64bit atomics on 8-byte aligned addresses only for
instance).

Modify the tests for HAVE_ATOMIC_BUILTINS64 and HAVE__ATOMIC_BUILTINS64 such
that the address for the atomic operations is not aligned (unless 64bit ints
always have the suitable alignment, i.e. mainly 64bit systems..).
Also, use the __atomic_always_lock_free() builtin to fail the test when the
compiler already knows about the alignment issue (falling back to libatomic,
which we don't require/want).

With this, 64bit builtins should be selected only for platforms that can
natively handle atomics on any apr_uin64_t (since the APR has no dedicated
8-byte aligned 64bit type for now), while the generic/mutex implementation
is used for others.


testatomic: initialize in the test the globals used by it.

Just in case the test is later reordered (e.g. test_atomics_threaded64
and test_atomics_threaded_setread64 use the same atomic_ops64 variable).


atomic: test 4-bytes aligned and/or cross-cacheline atomics (on 32bit systems).


Merges r1907541, r1907678, r1907637, r1907642, r1907677, r1907985 from trunk.
Merges r1907679, r1907998 from 1.8.x.
Submitted by: ylavic

Modified:
    apr/apr/branches/1.7.x/   (props changed)
    apr/apr/branches/1.7.x/atomic/unix/builtins.c
    apr/apr/branches/1.7.x/atomic/unix/builtins64.c
    apr/apr/branches/1.7.x/atomic/unix/mutex64.c
    apr/apr/branches/1.7.x/configure.in
    apr/apr/branches/1.7.x/test/testatomic.c

Propchange: apr/apr/branches/1.7.x/
------------------------------------------------------------------------------
  Merged /apr/apr/branches/1.8.x:r1907679,1907998
  Merged /apr/apr/trunk:r1907541,1907637,1907642,1907677-1907678,1907985

Modified: apr/apr/branches/1.7.x/atomic/unix/builtins.c
URL: http://svn.apache.org/viewvc/apr/apr/branches/1.7.x/atomic/unix/builtins.c?rev=1908002&r1=1908001&r2=1908002&view=diff
==============================================================================
--- apr/apr/branches/1.7.x/atomic/unix/builtins.c (original)
+++ apr/apr/branches/1.7.x/atomic/unix/builtins.c Thu Mar  2 22:36:42 2023
@@ -18,10 +18,11 @@
 
 #ifdef USE_ATOMICS_BUILTINS
 
-#if defined(__arm__) || defined(__powerpc__) || defined(__powerpc64__)
-#define WEAK_MEMORY_ORDERING 1
-#else
+#if defined(__i386__) || defined(__x86_64__) \
+    || defined(__s390__) || defined(__s390x__)
 #define WEAK_MEMORY_ORDERING 0
+#else
+#define WEAK_MEMORY_ORDERING 1
 #endif
 
 APR_DECLARE(apr_status_t) apr_atomic_init(apr_pool_t *p)

Modified: apr/apr/branches/1.7.x/atomic/unix/builtins64.c
URL: http://svn.apache.org/viewvc/apr/apr/branches/1.7.x/atomic/unix/builtins64.c?rev=1908002&r1=1908001&r2=1908002&view=diff
==============================================================================
--- apr/apr/branches/1.7.x/atomic/unix/builtins64.c (original)
+++ apr/apr/branches/1.7.x/atomic/unix/builtins64.c Thu Mar  2 22:36:42 2023
@@ -18,17 +18,18 @@
 
 #ifdef USE_ATOMICS_BUILTINS64
 
-#if defined(__arm__) || defined(__powerpc__) || defined(__powerpc64__)
-#define WEAK_MEMORY_ORDERING 1
-#else
+#if defined(__i386__) || defined(__x86_64__) \
+    || defined(__s390__) || defined(__s390x__)
 #define WEAK_MEMORY_ORDERING 0
+#else
+#define WEAK_MEMORY_ORDERING 1
 #endif
 
 APR_DECLARE(apr_uint64_t) apr_atomic_read64(volatile apr_uint64_t *mem)
 {
 #if HAVE__ATOMIC_BUILTINS
     return __atomic_load_n(mem, __ATOMIC_SEQ_CST);
-#elif WEAK_MEMORY_ORDERING
+#elif WEAK_MEMORY_ORDERING || APR_SIZEOF_VOIDP < 8
     /* No __sync_load() available => apr_atomic_add64(mem, 0) */
     return __sync_fetch_and_add(mem, 0);
 #else
@@ -40,7 +41,7 @@ APR_DECLARE(void) apr_atomic_set64(volat
 {
 #if HAVE__ATOMIC_BUILTINS
     __atomic_store_n(mem, val, __ATOMIC_SEQ_CST);
-#elif WEAK_MEMORY_ORDERING
+#elif WEAK_MEMORY_ORDERING || APR_SIZEOF_VOIDP < 8
     /* No __sync_store() available => apr_atomic_xchg64(mem, val) */
     __sync_synchronize();
     __sync_lock_test_and_set(mem, val);

Modified: apr/apr/branches/1.7.x/atomic/unix/mutex64.c
URL: http://svn.apache.org/viewvc/apr/apr/branches/1.7.x/atomic/unix/mutex64.c?rev=1908002&r1=1908001&r2=1908002&view=diff
==============================================================================
--- apr/apr/branches/1.7.x/atomic/unix/mutex64.c (original)
+++ apr/apr/branches/1.7.x/atomic/unix/mutex64.c Thu Mar  2 22:36:42 2023
@@ -96,7 +96,14 @@ apr_status_t apr__atomic_generic64_init(
 
 APR_DECLARE(apr_uint64_t) apr_atomic_read64(volatile apr_uint64_t *mem)
 {
-    return *mem;
+    apr_uint64_t cur_value;
+    DECLARE_MUTEX_LOCKED(mutex, mem);
+
+    cur_value = *mem;
+
+    MUTEX_UNLOCK(mutex);
+
+    return cur_value;
 }
 
 APR_DECLARE(void) apr_atomic_set64(volatile apr_uint64_t *mem, apr_uint64_t val)

Modified: apr/apr/branches/1.7.x/configure.in
URL: http://svn.apache.org/viewvc/apr/apr/branches/1.7.x/configure.in?rev=1908002&r1=1908001&r2=1908002&view=diff
==============================================================================
--- apr/apr/branches/1.7.x/configure.in (original)
+++ apr/apr/branches/1.7.x/configure.in Thu Mar  2 22:36:42 2023
@@ -552,31 +552,35 @@ AC_CACHE_CHECK([whether the compiler pro
 [AC_TRY_RUN([
 #if HAVE_STDINT_H
 #include <stdint.h>
+typedef uint64_t u64_t;
+#else
+typedef unsigned long long u64_t;
 #endif
 int main(int argc, const char *const *argv)
 {
-#if HAVE_STDINT_H
-    uint64_t val = 1010, tmp, *mem = &val;
-#else
-    unsigned long long val = 1010, tmp, *mem = &val;
-#endif
+    struct {
+        char pad0;
+        u64_t val;
+    } s;
+    u64_t *mem = &s.val, tmp;
 
-    if (__sync_fetch_and_add(&val, 1010) != 1010 || val != 2020)
+    s.val = 1010;
+    if (__sync_fetch_and_add(&s.val, 1010) != 1010 || s.val != 2020)
         return 1;
 
-    tmp = val;
-    if (__sync_fetch_and_sub(mem, 1010) != tmp || val != 1010)
+    tmp = s.val;
+    if (__sync_fetch_and_sub(mem, 1010) != tmp || s.val != 1010)
         return 1;
 
-    if (__sync_sub_and_fetch(&val, 1010) != 0 || val != 0)
+    if (__sync_sub_and_fetch(&s.val, 1010) != 0 || s.val != 0)
         return 1;
 
     tmp = 3030;
-    if (__sync_val_compare_and_swap(mem, 0, tmp) != 0 || val != tmp)
+    if (__sync_val_compare_and_swap(mem, 0, tmp) != 0 || s.val != tmp)
         return 1;
 
     __sync_synchronize();
-    if (__sync_lock_test_and_set(&val, 4040) != 3030)
+    if (__sync_lock_test_and_set(&s.val, 4040) != 3030)
         return 1;
 
     return 0;
@@ -586,31 +590,45 @@ AC_CACHE_CHECK([whether the compiler pro
 [AC_TRY_RUN([
 #if HAVE_STDINT_H
 #include <stdint.h>
+typedef uint64_t u64_t;
+#else
+typedef unsigned long long u64_t;
 #endif
+static int test_always_lock_free(volatile u64_t *val)
+{
+    return __atomic_always_lock_free(sizeof(*val), val);
+}
 int main(int argc, const char *const *argv)
 {
-#if HAVE_STDINT_H
-    uint64_t val = 1010, tmp, *mem = &val;
-#else
-    unsigned long long val = 1010, tmp, *mem = &val;
-#endif
+    struct {
+        char pad0;
+        u64_t val;
+        char pad1;
+        u64_t tmp;
+    } s;
+    u64_t *mem = &s.val;
+
+    /* check if alignment matters (no fallback to libatomic) */
+    if (!test_always_lock_free(&s.val))
+        return 1;
 
-    if (__atomic_fetch_add(&val, 1010, __ATOMIC_SEQ_CST) != 1010 || val != 2020)
+    s.val = 1010;
+    if (__atomic_fetch_add(&s.val, 1010, __ATOMIC_SEQ_CST) != 1010 || s.val != 2020)
         return 1;
 
-    tmp = val;
-    if (__atomic_fetch_sub(mem, 1010, __ATOMIC_SEQ_CST) != tmp || val != 1010)
+    s.tmp = s.val;
+    if (__atomic_fetch_sub(mem, 1010, __ATOMIC_SEQ_CST) != s.tmp || s.val != 1010)
         return 1;
 
-    if (__atomic_sub_fetch(&val, 1010, __ATOMIC_SEQ_CST) != 0 || val != 0)
+    if (__atomic_sub_fetch(&s.val, 1010, __ATOMIC_SEQ_CST) != 0 || s.val != 0)
         return 1;
 
-    tmp = val;
-    if (!__atomic_compare_exchange_n(mem, &tmp, 3030, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
-            || tmp != 0)
+    s.tmp = s.val;
+    if (!__atomic_compare_exchange_n(mem, &s.tmp, 3030, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
+            || s.tmp != 0)
         return 1;
 
-    if (__atomic_exchange_n(&val, 4040, __ATOMIC_SEQ_CST) != 3030)
+    if (__atomic_exchange_n(&s.val, 4040, __ATOMIC_SEQ_CST) != 3030)
         return 1;
 
     return 0;

Modified: apr/apr/branches/1.7.x/test/testatomic.c
URL: http://svn.apache.org/viewvc/apr/apr/branches/1.7.x/test/testatomic.c?rev=1908002&r1=1908001&r2=1908002&view=diff
==============================================================================
--- apr/apr/branches/1.7.x/test/testatomic.c (original)
+++ apr/apr/branches/1.7.x/test/testatomic.c Thu Mar  2 22:36:42 2023
@@ -356,10 +356,18 @@ void *APR_THREAD_FUNC thread_func_atomic
 
 apr_thread_mutex_t *thread_lock;
 apr_thread_mutex_t *thread_lock64;
-volatile apr_uint32_t mutex_locks = 0;
-volatile apr_uint64_t mutex_locks64 = 0;
-volatile apr_uint32_t atomic_ops = 0;
-volatile apr_uint64_t atomic_ops64 = 0;
+apr_uint32_t mutex_locks = 0;
+apr_uint64_t mutex_locks64 = 0;
+apr_uint32_t atomic_ops = 0;
+apr_uint64_t atomic_ops64 = 0;
+#ifndef CACHELINE_SIZE
+#define CACHELINE_SIZE 64
+#endif
+struct {
+    char pad[CACHELINE_SIZE - 5];
+    apr_uint64_t ops64;
+} atomic_pad __attribute__((aligned(CACHELINE_SIZE)));
+
 apr_status_t exit_ret_val = 123; /* just some made up number to check on later */
 
 #define NUM_THREADS 40
@@ -639,13 +647,14 @@ void *APR_THREAD_FUNC thread_func_mutex6
 
 void *APR_THREAD_FUNC thread_func_atomic64(apr_thread_t *thd, void *data)
 {
+    apr_uint64_t *ops64 = data;
     int i;
 
     for (i = 0; i < NUM_ITERATIONS ; i++) {
-        apr_atomic_inc64(&atomic_ops64);
-        apr_atomic_add64(&atomic_ops64, 2);
-        apr_atomic_dec64(&atomic_ops64);
-        apr_atomic_dec64(&atomic_ops64);
+        apr_atomic_inc64(ops64);
+        apr_atomic_add64(ops64, 2);
+        apr_atomic_dec64(ops64);
+        apr_atomic_dec64(ops64);
     }
     apr_thread_exit(thd, exit_ret_val);
     return NULL;
@@ -653,6 +662,7 @@ void *APR_THREAD_FUNC thread_func_atomic
 
 static void test_atomics_threaded64(abts_case *tc, void *data)
 {
+    apr_uint64_t *ops64 = data;
     apr_thread_t *t1[NUM_THREADS];
     apr_thread_t *t2[NUM_THREADS];
     apr_status_t rv;
@@ -662,13 +672,16 @@ static void test_atomics_threaded64(abts
     pthread_setconcurrency(8);
 #endif
 
+    mutex_locks64 = 0;
+    apr_atomic_set64(ops64, 0);
+
     rv = apr_thread_mutex_create(&thread_lock64, APR_THREAD_MUTEX_DEFAULT, p);
     APR_ASSERT_SUCCESS(tc, "Could not create lock", rv);
 
     for (i = 0; i < NUM_THREADS; i++) {
         apr_status_t r1, r2;
         r1 = apr_thread_create(&t1[i], NULL, thread_func_mutex64, NULL, p);
-        r2 = apr_thread_create(&t2[i], NULL, thread_func_atomic64, NULL, p);
+        r2 = apr_thread_create(&t2[i], NULL, thread_func_atomic64, ops64, p);
         ABTS_ASSERT(tc, "Failed creating threads", !r1 && !r2);
     }
 
@@ -683,7 +696,7 @@ static void test_atomics_threaded64(abts
 
     ABTS_ULLONG_EQUAL(tc, NUM_THREADS * NUM_ITERATIONS, mutex_locks64);
     ABTS_ULLONG_EQUAL(tc, NUM_THREADS * NUM_ITERATIONS,
-                      apr_atomic_read64(&atomic_ops64));
+                      apr_atomic_read64(ops64));
 
     rv = apr_thread_mutex_destroy(thread_lock64);
     ABTS_ASSERT(tc, "Failed creating threads", rv == APR_SUCCESS);
@@ -878,11 +891,12 @@ static void test_atomics_busyloop_thread
 
 static void *APR_THREAD_FUNC test_func_set64(apr_thread_t *thd, void *data)
 {
+    apr_uint64_t *ops64 = data;
     int i;
 
     for (i = 0; i < 1000 * 1000; i++) {
-        apr_atomic_set64(&atomic_ops64, APR_UINT64_C(0x1111222233334444));
-        apr_atomic_set64(&atomic_ops64, APR_UINT64_C(0x4444555566667777));
+        apr_atomic_set64(ops64, APR_UINT64_C(0x1111222233334444));
+        apr_atomic_set64(ops64, APR_UINT64_C(0x4444555566667777));
     }
 
     apr_thread_exit(thd, APR_SUCCESS);
@@ -891,16 +905,17 @@ static void *APR_THREAD_FUNC test_func_s
 
 static void test_atomics_threaded_setread64(abts_case *tc, void *data)
 {
-    apr_status_t retval;
+    apr_uint64_t *ops64 = data;
     apr_thread_t *thread;
+    apr_status_t retval;
     int i;
 
-    apr_atomic_set64(&atomic_ops64, APR_UINT64_C(0x1111222233334444));
+    apr_atomic_set64(ops64, APR_UINT64_C(0x1111222233334444));
 
-    apr_thread_create(&thread, NULL, test_func_set64, NULL, p);
+    apr_thread_create(&thread, NULL, test_func_set64, ops64, p);
 
     for (i = 0; i < 1000 * 1000 * 2; i++) {
-        apr_uint64_t val = apr_atomic_read64(&atomic_ops64);
+        apr_uint64_t val = apr_atomic_read64(ops64);
 
         if (val != APR_UINT64_C(0x1111222233334444) &&
             val != APR_UINT64_C(0x4444555566667777))
@@ -950,10 +965,12 @@ abts_suite *testatomic(abts_suite *suite
 
 #if APR_HAS_THREADS
     abts_run_test(suite, test_atomics_threaded, NULL);
-    abts_run_test(suite, test_atomics_threaded64, NULL);
+    abts_run_test(suite, test_atomics_threaded64, &atomic_ops64);
+    abts_run_test(suite, test_atomics_threaded64, &atomic_pad.ops64);
     abts_run_test(suite, test_atomics_busyloop_threaded, NULL);
     abts_run_test(suite, test_atomics_busyloop_threaded64, NULL);
-    abts_run_test(suite, test_atomics_threaded_setread64, NULL);
+    abts_run_test(suite, test_atomics_threaded_setread64, &atomic_ops64);
+    abts_run_test(suite, test_atomics_threaded_setread64, &atomic_pad.ops64);
 #endif
 
     return suite;