You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mynewt.apache.org by an...@apache.org on 2019/07/12 11:44:30 UTC
[mynewt-core] branch master updated: libc/baselibc: Add optimized
memset for Cortex-M
This is an automated email from the ASF dual-hosted git repository.
andk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mynewt-core.git
The following commit(s) were added to refs/heads/master by this push:
new 625afe1 libc/baselibc: Add optimized memset for Cortex-M
625afe1 is described below
commit 625afe14e3f9b5857d5f2f89c0a9b02a65bbc266
Author: Andrzej Kaczmarek <an...@codecoup.pl>
AuthorDate: Sun Jul 7 18:17:05 2019 +0200
libc/baselibc: Add optimized memset for Cortex-M
This optimized memset() uses aligned word size stores to speed up the
process and is ~4 times faster than simple loop.
---
libc/baselibc/src/memset.c | 41 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 41 insertions(+)
diff --git a/libc/baselibc/src/memset.c b/libc/baselibc/src/memset.c
index aa00b5b..7fd6b47 100644
--- a/libc/baselibc/src/memset.c
+++ b/libc/baselibc/src/memset.c
@@ -20,6 +20,47 @@ void *memset(void *dst, int c, size_t n)
:"+c" (nq), "+D" (q)
: "a" ((unsigned char)c * 0x0101010101010101U),
"r" ((uint32_t) n & 7));
+#elif defined(__arm__)
+ asm volatile (".syntax unified \n"
+ /* copy 8-bit value to all 4 bytes in word */
+#if __CORTEX_M < 3
+ /* Cortex-M0 does not support flexible 2nd operand */
+ " uxtb %[val], %[val] \n"
+ " lsls r4, %[val], #8 \n"
+ " orrs %[val], %[val],r4 \n"
+ " lsls r4, %[val], #16 \n"
+ " orrs %[val], %[val], r4 \n"
+#else
+ " uxtb %[val], %[val] \n"
+ " orr %[val], %[val], %[val], lsl#8 \n"
+ " orr %[val], %[val], %[val], lsl#16 \n"
+#endif
+ /* calculate max length of data that are word aligned */
+ " adds r3, %[buf], %[len] \n"
+ " movs r4, #3 \n"
+ " ands r3, r3, r4 \n"
+ " subs r3, %[len], r3 \n"
+ " b 2f \n"
+ /* fill non-word aligned bytes at the end of buffer */
+ "1: subs %[len], #1 \n"
+ " strb %[val], [%[buf], %[len]] \n"
+ "2: cmp %[len], r3 \n"
+ " bne 1b \n"
+ /* fill all word aligned bytes */
+ " b 2f \n"
+ "1: str %[val], [%[buf], %[len]] \n"
+ "2: subs %[len], #4 \n"
+ " bpl 1b \n"
+ " adds %[len], #4 \n"
+ /* fill remaining non-word aligned bytes */
+ " b 2f \n"
+ "1: strb %[val], [%[buf], %[len]] \n"
+ "2: subs %[len], #1 \n"
+ " bpl 1b \n"
+ : [buf] "+r" (q), [val] "+r" (c), [len] "+r" (n)
+ :
+ : "r3", "r4", "memory"
+ );
#else
while (n--) {
*q++ = c;