You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mynewt.apache.org by an...@apache.org on 2019/07/12 11:44:30 UTC
[mynewt-core] branch master updated: libc/baselibc: Add optimized memset for Cortex-M

This is an automated email from the ASF dual-hosted git repository.

andk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mynewt-core.git


The following commit(s) were added to refs/heads/master by this push:
     new 625afe1  libc/baselibc: Add optimized memset for Cortex-M
625afe1 is described below

commit 625afe14e3f9b5857d5f2f89c0a9b02a65bbc266
Author: Andrzej Kaczmarek <an...@codecoup.pl>
AuthorDate: Sun Jul 7 18:17:05 2019 +0200

    libc/baselibc: Add optimized memset for Cortex-M
    
    This optimized memset() uses aligned word size stores to speed up the
    process and is ~4 times faster than simple loop.
---
 libc/baselibc/src/memset.c | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/libc/baselibc/src/memset.c b/libc/baselibc/src/memset.c
index aa00b5b..7fd6b47 100644
--- a/libc/baselibc/src/memset.c
+++ b/libc/baselibc/src/memset.c
@@ -20,6 +20,47 @@ void *memset(void *dst, int c, size_t n)
 		      :"+c" (nq), "+D" (q)
 		      : "a" ((unsigned char)c * 0x0101010101010101U),
 			"r" ((uint32_t) n & 7));
+#elif defined(__arm__)
+    asm volatile (".syntax unified                          \n"
+                  /* copy 8-bit value to all 4 bytes in word */
+#if __CORTEX_M < 3
+                  /* Cortex-M0 does not support flexible 2nd operand */
+                  "   uxtb %[val], %[val]                   \n"
+                  "   lsls  r4, %[val], #8                  \n"
+                  "   orrs %[val], %[val],r4                \n"
+                  "   lsls  r4, %[val], #16                 \n"
+                  "   orrs %[val], %[val], r4               \n"
+#else
+                  "   uxtb %[val], %[val]                   \n"
+                  "   orr  %[val], %[val], %[val], lsl#8    \n"
+                  "   orr  %[val], %[val], %[val], lsl#16   \n"
+#endif
+                  /* calculate max length of data that are word aligned */
+                  "   adds r3, %[buf], %[len]               \n"
+                  "   movs r4, #3                           \n"
+                  "   ands r3, r3, r4                       \n"
+                  "   subs r3, %[len], r3                   \n"
+                  "   b    2f                               \n"
+                  /* fill non-word aligned bytes at the end of buffer */
+                  "1: subs %[len], #1                       \n"
+                  "   strb %[val], [%[buf], %[len]]         \n"
+                  "2: cmp  %[len], r3                       \n"
+                  "   bne  1b                               \n"
+                  /* fill all word aligned bytes */
+                  "   b    2f                               \n"
+                  "1: str  %[val], [%[buf], %[len]]         \n"
+                  "2: subs %[len], #4                       \n"
+                  "   bpl  1b                               \n"
+                  "   adds %[len], #4                       \n"
+                  /* fill remaining non-word aligned bytes */
+                  "   b    2f                               \n"
+                  "1: strb %[val], [%[buf], %[len]]         \n"
+                  "2: subs %[len], #1                       \n"
+                  "   bpl  1b                               \n"
+                  : [buf] "+r" (q), [val] "+r" (c), [len] "+r" (n)
+                  :
+                  : "r3", "r4", "memory"
+                 );
 #else
 	while (n--) {
 		*q++ = c;