You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nuttx.apache.org by xi...@apache.org on 2022/03/14 03:12:49 UTC

[incubator-nuttx] 06/09: xtensa_context.S: Use Zephyr's version of spilling the window register file.

This is an automated email from the ASF dual-hosted git repository.

xiaoxiang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nuttx.git

commit 5305f76b1d8858f20d36c464d4e8c670c25b41ef
Author: Abdelatif Guettouche <ab...@espressif.com>
AuthorDate: Fri Mar 11 19:36:56 2022 +0100

    xtensa_context.S: Use Zephyr's version of spilling the window register
    file.
    
    Signed-off-by: Abdelatif Guettouche <ab...@espressif.com>
---
 arch/xtensa/include/irq.h                    |  7 +--
 arch/xtensa/src/common/xtensa_asm_utils.h    | 76 ++++++++++++++++++++++++++++
 arch/xtensa/src/common/xtensa_context.S      | 44 +++++++++++++---
 arch/xtensa/src/common/xtensa_int_handlers.S |  2 +-
 4 files changed, 115 insertions(+), 14 deletions(-)

diff --git a/arch/xtensa/include/irq.h b/arch/xtensa/include/irq.h
index 9c00ba6..d21539b 100644
--- a/arch/xtensa/include/irq.h
+++ b/arch/xtensa/include/irq.h
@@ -106,13 +106,10 @@
 #endif
 
 #ifndef __XTENSA_CALL0_ABI__
-  /* Temporary space for saving stuff during window spill.
-   * REVISIT: I don't think that we need so many temporaries.
-   */
+  /* Temporary space for saving stuff during window spill. */
 
 #  define REG_TMP0          (_REG_WINDOW_TMPS + 0)
-#  define REG_TMP1          (_REG_WINDOW_TMPS + 1)
-#  define _REG_OVLY_START   (_REG_WINDOW_TMPS + 2)
+#  define _REG_OVLY_START   (_REG_WINDOW_TMPS + 1)
 #else
 #  define _REG_OVLY_START   _REG_WINDOW_TMPS
 #endif
diff --git a/arch/xtensa/src/common/xtensa_asm_utils.h b/arch/xtensa/src/common/xtensa_asm_utils.h
new file mode 100644
index 0000000..8dec082
--- /dev/null
+++ b/arch/xtensa/src/common/xtensa_asm_utils.h
@@ -0,0 +1,76 @@
+/****************************************************************************
+ * arch/xtensa/src/common/xtensa_asm_utils.h
+ *
+ * Copyright (c) 2017, Intel Corporation
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ ****************************************************************************/
+
+#ifndef __ARCH_XTENSA_SRC_COMMON_XTENSA_ASM_UTILS_H
+#define __ARCH_XTENSA_SRC_COMMON_XTENSA_ASM_UTILS_H
+
+/****************************************************************************
+ * Assembly Language Macros
+ ****************************************************************************/
+
+/****************************************************************************
+ *
+ * Name: SPILL_ALL_WINDOWS
+ *
+ * Spills all windowed registers (i.e. registers not visible as
+ * A0-A15) to their ABI-defined spill regions on the stack.
+ *
+ * Unlike the Xtensa HAL implementation, this code requires that the
+ * EXCM and WOE bit be enabled in PS, and relies on repeated hardware
+ * exception handling to do the register spills.  The trick is to do a
+ * noop write to the high registers, which the hardware will trap
+ * (into an overflow exception) in the case where those registers are
+ * already used by an existing call frame.  Then it rotates the window
+ * and repeats until all but the A0-A3 registers of the original frame
+ * are guaranteed to be spilled, eventually rotating back around into
+ * the original frame.  Advantages:
+ *
+ * - Vastly smaller code size
+ *
+ * - More easily maintained if changes are needed to window over/underflow
+ *   exception handling.
+ *
+ * - Requires no scratch registers to do its work, so can be used safely in
+ *   any context.
+ *
+ * - If the WOE bit is not enabled (for example, in code written for
+ *   the CALL0 ABI), this becomes a silent noop and operates compatbily.
+ *
+ * - Hilariously it's ACTUALLY FASTER than the HAL routine.  And not
+ *   just a little bit, it's MUCH faster.  With a mostly full register
+ *   file on an LX6 core (ESP-32) I'm measuring 145 cycles to spill
+ *   registers with this vs. 279 (!) to do it with
+ *   xthal_spill_windows().
+ ****************************************************************************/
+
+.macro SPILL_ALL_WINDOWS
+#if XCHAL_NUM_AREGS == 64
+  and a12, a12, a12
+  rotw 3
+  and a12, a12, a12
+  rotw 3
+  and a12, a12, a12
+  rotw 3
+  and a12, a12, a12
+  rotw 3
+  and a12, a12, a12
+  rotw 4
+#elif XCHAL_NUM_AREGS == 32
+  and a12, a12, a12
+  rotw 3
+  and a12, a12, a12
+  rotw 3
+  and a4, a4, a4
+  rotw 2
+#else
+#error Unrecognized XCHAL_NUM_AREGS
+#endif
+.endm
+
+#endif /* __ARCH_XTENSA_SRC_COMMON_XTENSA_ASM_UTILS_H */
diff --git a/arch/xtensa/src/common/xtensa_context.S b/arch/xtensa/src/common/xtensa_context.S
index 845e0c5..2d2d9d9 100644
--- a/arch/xtensa/src/common/xtensa_context.S
+++ b/arch/xtensa/src/common/xtensa_context.S
@@ -68,6 +68,7 @@
 
 #include "syscall.h"
 #include "xtensa_swi.h"
+#include "xtensa_asm_utils.h"
 
 /****************************************************************************
  * Public Functions
@@ -160,16 +161,43 @@ _xtensa_context_save:
 #error Overlay support is not implemented
 #endif
 
-	s32i	a0, sp, (4 * REG_TMP0)			/* Save return address */
-	s32i	sp, sp, (4 * REG_TMP1)			/* Save current stack pointer */
-	wsr		sp, EXCSAVE_1					/* Preserve register save area */
+	/* SPILL_ALL_WINDOWS macro requires window overflow exceptions to be enabled,
+	 * i.e. PS.EXCM cleared and PS.WOE set.
+	 * Since we are going to clear PS.EXCM, we also need to increase INTLEVEL
+	 * at least to XCHAL_EXCM_LEVEL. This matches that value of effective INTLEVEL
+	 * at entry (CINTLEVEL=max(PS.INTLEVEL, XCHAL_EXCM_LEVEL) when PS.EXCM is set.
+	 * Since WindowOverflow exceptions will trigger inside SPILL_ALL_WINDOWS,
+	 * we need to save/restore EPC1 as well.
+	 * NOTE: Even though a4-a15 are saved into the exception frame, we should not
+	 * clobber them until after SPILL_ALL_WINDOWS. This is because these registers
+	 * may contain live windows belonging to previous frames in the call stack.
+	 * These frames will be spilled by SPILL_ALL_WINDOWS, and if the register was
+	 * used as a temporary by this code, the temporary value would get stored
+	 * onto the stack, instead of the real value.
+	 */
+
+	s32i    a0, sp, (4 * REG_TMP0)     /* Save return address */
+	rsr     a2, PS                     /* To be restored after SPILL_ALL_WINDOWS */
+	movi    a0, PS_INTLEVEL_MASK
+	and     a3, a2, a0                 /* Get the current INTLEVEL */
+	bgeui   a3, XCHAL_EXCM_LEVEL, 1f   /* Calculate max(INTLEVEL, XCHAL_EXCM_LEVEL) */
+	movi    a3, XCHAL_EXCM_LEVEL
+1:
+	movi    a0, PS_UM | PS_WOE         /* Clear EXCM, enable window overflow, set new INTLEVEL */
+	or      a3, a3, a0
+	wsr     a3, ps
+	rsync
+	rsr     a0, EPC1                   /* To be restored after SPILL_ALL_WINDOWS */
 
-	l32i	sp, sp, (4 * REG_A1)			/* Restore the interruptee's SP */
-	call0	_xtensa_window_spill			/* Preserves only a4-a5, a8-a9, a12-a13 */
+	addi    sp,  sp, XCPTCONTEXT_SIZE  /* Go back to spill register region */
+	SPILL_ALL_WINDOWS                  /* Place the live register windows there */
+	addi    sp,  sp, -XCPTCONTEXT_SIZE /* Return the current stack pointer and proceed with context save*/
+
+	wsr     a2, PS                     /* Restore PS to the value at entry */
+	wsr     a0, EPC1                   /* Restore EPC1 to the value at entry */
+	rsync
+	l32i  a0, sp, (4 * REG_TMP0)       /* Restore return address */
 
-	rsr		sp, EXCSAVE_1					/* Save interruptee's a0 */
-	l32i	a0, sp, (4 * REG_TMP0)			/* Save return address */
-	l32i	sp, sp, (4 * REG_TMP1)			/* Save current stack pointer */
 #endif
 
 	ret
diff --git a/arch/xtensa/src/common/xtensa_int_handlers.S b/arch/xtensa/src/common/xtensa_int_handlers.S
index 2bdb997..e2b71dd 100644
--- a/arch/xtensa/src/common/xtensa_int_handlers.S
+++ b/arch/xtensa/src/common/xtensa_int_handlers.S
@@ -352,7 +352,7 @@ _xtensa_level1_handler:
 	l32i	a0, a2, (4 * REG_PC)			/* Retrieve interruptee's PC */
 	wsr		a0, EPC_1
 	l32i	a0, a2, (4 * REG_A0)			/* Retrieve interruptee's A0 */
-  l32i  sp, a2, (4 * REG_A1)      /* Retrieve interrupt stack frame */
+	l32i  sp, a2, (4 * REG_A1)      /* Retrieve interrupt stack frame */
 	l32i	a2, a2, (4 * REG_A2)			/* Retrieve interruptee's A2 */
 	rsync								          	/* Ensure PS and EPC written */