You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficserver.apache.org by zw...@apache.org on 2015/07/23 13:14:04 UTC

[07/43] trafficserver git commit: TS-3783 TS-3030 Add luajit v2.0.4 as a subtree

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/1f27b840/lib/luajit/src/vm_arm.dasc
----------------------------------------------------------------------
diff --git a/lib/luajit/src/vm_arm.dasc b/lib/luajit/src/vm_arm.dasc
new file mode 100644
index 0000000..457efa6
--- /dev/null
+++ b/lib/luajit/src/vm_arm.dasc
@@ -0,0 +1,4486 @@
+|// Low-level VM code for ARM CPUs.
+|// Bytecode interpreter, fast functions and helper functions.
+|// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+|
+|.arch arm
+|.section code_op, code_sub
+|
+|.actionlist build_actionlist
+|.globals GLOB_
+|.globalnames globnames
+|.externnames extnames
+|
+|// Note: The ragged indentation of the instructions is intentional.
+|//       The starting columns indicate data dependencies.
+|
+|//-----------------------------------------------------------------------
+|
+|// Fixed register assignments for the interpreter.
+|
+|// The following must be C callee-save.
+|.define MASKR8,	r4	// 255*8 constant for fast bytecode decoding.
+|.define KBASE,		r5	// Constants of current Lua function.
+|.define PC,		r6	// Next PC.
+|.define DISPATCH,	r7	// Opcode dispatch table.
+|.define LREG,		r8	// Register holding lua_State (also in SAVE_L).
+|
+|// C callee-save in EABI, but often refetched. Temporary in iOS 3.0+.
+|.define BASE,		r9	// Base of current Lua stack frame.
+|
+|// The following temporaries are not saved across C calls, except for RA/RC.
+|.define RA,		r10	// Callee-save.
+|.define RC,		r11	// Callee-save.
+|.define RB,		r12
+|.define OP,		r12	// Overlaps RB, must not be lr.
+|.define INS,		lr
+|
+|// Calling conventions. Also used as temporaries.
+|.define CARG1,		r0
+|.define CARG2,		r1
+|.define CARG3,		r2
+|.define CARG4,		r3
+|.define CARG12,	r0	// For 1st soft-fp double.
+|.define CARG34,	r2	// For 2nd soft-fp double.
+|
+|.define CRET1,		r0
+|.define CRET2,		r1
+|
+|// Stack layout while in interpreter. Must match with lj_frame.h.
+|.define SAVE_R4,	[sp, #28]
+|.define CFRAME_SPACE,	#28
+|.define SAVE_ERRF,	[sp, #24]
+|.define SAVE_NRES,	[sp, #20]
+|.define SAVE_CFRAME,	[sp, #16]
+|.define SAVE_L,	[sp, #12]
+|.define SAVE_PC,	[sp, #8]
+|.define SAVE_MULTRES,	[sp, #4]
+|.define ARG5,		[sp]
+|
+|.define TMPDhi,	[sp, #4]
+|.define TMPDlo,	[sp]
+|.define TMPD,		[sp]
+|.define TMPDp,		sp
+|
+|.if FPU
+|.macro saveregs
+|  push {r5, r6, r7, r8, r9, r10, r11, lr}
+|  vpush {d8-d15}
+|  sub sp, sp, CFRAME_SPACE+4
+|  str r4, SAVE_R4
+|.endmacro
+|.macro restoreregs_ret
+|  ldr r4, SAVE_R4
+|  add sp, sp, CFRAME_SPACE+4
+|  vpop {d8-d15}
+|  pop {r5, r6, r7, r8, r9, r10, r11, pc}
+|.endmacro
+|.else
+|.macro saveregs
+|  push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+|  sub sp, sp, CFRAME_SPACE
+|.endmacro
+|.macro restoreregs_ret
+|  add sp, sp, CFRAME_SPACE
+|  pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+|.endmacro
+|.endif
+|
+|// Type definitions. Some of these are only used for documentation.
+|.type L,		lua_State,	LREG
+|.type GL,		global_State
+|.type TVALUE,		TValue
+|.type GCOBJ,		GCobj
+|.type STR,		GCstr
+|.type TAB,		GCtab
+|.type LFUNC,		GCfuncL
+|.type CFUNC,		GCfuncC
+|.type PROTO,		GCproto
+|.type UPVAL,		GCupval
+|.type NODE,		Node
+|.type NARGS8,		int
+|.type TRACE,		GCtrace
+|
+|//-----------------------------------------------------------------------
+|
+|// Trap for not-yet-implemented parts.
+|.macro NYI; ud; .endmacro
+|
+|//-----------------------------------------------------------------------
+|
+|// Access to frame relative to BASE.
+|.define FRAME_FUNC,	#-8
+|.define FRAME_PC,	#-4
+|
+|.macro decode_RA8, dst, ins; and dst, MASKR8, ins, lsr #5; .endmacro
+|.macro decode_RB8, dst, ins; and dst, MASKR8, ins, lsr #21; .endmacro
+|.macro decode_RC8, dst, ins; and dst, MASKR8, ins, lsr #13; .endmacro
+|.macro decode_RD, dst, ins; lsr dst, ins, #16; .endmacro
+|.macro decode_OP, dst, ins; and dst, ins, #255; .endmacro
+|
+|// Instruction fetch.
+|.macro ins_NEXT1
+|  ldrb OP, [PC]
+|.endmacro
+|.macro ins_NEXT2
+|   ldr INS, [PC], #4
+|.endmacro
+|// Instruction decode+dispatch.
+|.macro ins_NEXT3
+|  ldr OP, [DISPATCH, OP, lsl #2]
+|   decode_RA8 RA, INS
+|   decode_RD RC, INS
+|  bx OP
+|.endmacro
+|.macro ins_NEXT
+|  ins_NEXT1
+|  ins_NEXT2
+|  ins_NEXT3
+|.endmacro
+|
+|// Instruction footer.
+|.if 1
+|  // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
+|  .define ins_next, ins_NEXT
+|  .define ins_next_, ins_NEXT
+|  .define ins_next1, ins_NEXT1
+|  .define ins_next2, ins_NEXT2
+|  .define ins_next3, ins_NEXT3
+|.else
+|  // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
+|  // Affects only certain kinds of benchmarks (and only with -j off).
+|  .macro ins_next
+|    b ->ins_next
+|  .endmacro
+|  .macro ins_next1
+|  .endmacro
+|  .macro ins_next2
+|  .endmacro
+|  .macro ins_next3
+|    b ->ins_next
+|  .endmacro
+|  .macro ins_next_
+|  ->ins_next:
+|    ins_NEXT
+|  .endmacro
+|.endif
+|
+|// Avoid register name substitution for field name.
+#define field_pc	pc
+|
+|// Call decode and dispatch.
+|.macro ins_callt
+|  // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
+|  ldr PC, LFUNC:CARG3->field_pc
+|  ldrb OP, [PC]  // STALL: load PC. early PC.
+|   ldr INS, [PC], #4
+|  ldr OP, [DISPATCH, OP, lsl #2]  // STALL: load OP. early OP.
+|   decode_RA8 RA, INS
+|   add RA, RA, BASE
+|  bx OP
+|.endmacro
+|
+|.macro ins_call
+|  // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
+|  str PC, [BASE, FRAME_PC]
+|  ins_callt  // STALL: locked PC.
+|.endmacro
+|
+|//-----------------------------------------------------------------------
+|
+|// Macros to test operand types.
+|.macro checktp, reg, tp; cmn reg, #-tp; .endmacro
+|.macro checktpeq, reg, tp; cmneq reg, #-tp; .endmacro
+|.macro checktpne, reg, tp; cmnne reg, #-tp; .endmacro
+|.macro checkstr, reg, target; checktp reg, LJ_TSTR; bne target; .endmacro
+|.macro checktab, reg, target; checktp reg, LJ_TTAB; bne target; .endmacro
+|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC; bne target; .endmacro
+|
+|// Assumes DISPATCH is relative to GL.
+#define DISPATCH_GL(field)	(GG_DISP2G + (int)offsetof(global_State, field))
+#define DISPATCH_J(field)	(GG_DISP2J + (int)offsetof(jit_State, field))
+|
+#define PC2PROTO(field)  ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
+|
+|.macro hotcheck, delta
+|  lsr CARG1, PC, #1
+|  and CARG1, CARG1, #126
+|  sub CARG1, CARG1, #-GG_DISP2HOT
+|  ldrh CARG2, [DISPATCH, CARG1]
+|  subs CARG2, CARG2, #delta
+|  strh CARG2, [DISPATCH, CARG1]
+|.endmacro
+|
+|.macro hotloop
+|  hotcheck HOTCOUNT_LOOP
+|  blo ->vm_hotloop
+|.endmacro
+|
+|.macro hotcall
+|  hotcheck HOTCOUNT_CALL
+|  blo ->vm_hotcall
+|.endmacro
+|
+|// Set current VM state.
+|.macro mv_vmstate, reg, st; mvn reg, #LJ_VMST_..st; .endmacro
+|.macro st_vmstate, reg; str reg, [DISPATCH, #DISPATCH_GL(vmstate)]; .endmacro
+|
+|// Move table write barrier back. Overwrites mark and tmp.
+|.macro barrierback, tab, mark, tmp
+|  ldr tmp, [DISPATCH, #DISPATCH_GL(gc.grayagain)]
+|   bic mark, mark, #LJ_GC_BLACK		// black2gray(tab)
+|  str tab, [DISPATCH, #DISPATCH_GL(gc.grayagain)]
+|   strb mark, tab->marked
+|  str tmp, tab->gclist
+|.endmacro
+|
+|.macro .IOS, a, b
+|.if IOS
+|  a, b
+|.endif
+|.endmacro
+|
+|//-----------------------------------------------------------------------
+
+#if !LJ_DUALNUM
+#error "Only dual-number mode supported for ARM target"
+#endif
+
+/* Generate subroutines used by opcodes and other parts of the VM. */
+/* The .code_sub section should be last to help static branch prediction. */
+static void build_subroutines(BuildCtx *ctx)
+{
+  |.code_sub
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Return handling ----------------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |->vm_returnp:
+  |  // See vm_return. Also: RB = previous base.
+  |  tst PC, #FRAME_P
+  |  beq ->cont_dispatch
+  |
+  |  // Return from pcall or xpcall fast func.
+  |  ldr PC, [RB, FRAME_PC]		// Fetch PC of previous frame.
+  |   mvn CARG2, #~LJ_TTRUE
+  |  mov BASE, RB
+  |  // Prepending may overwrite the pcall frame, so do it at the end.
+  |   str CARG2, [RA, FRAME_PC]		// Prepend true to results.
+  |  sub RA, RA, #8
+  |
+  |->vm_returnc:
+  |  adds RC, RC, #8			// RC = (nresults+1)*8.
+  |  mov CRET1, #LUA_YIELD
+  |  beq ->vm_unwind_c_eh
+  |  str RC, SAVE_MULTRES
+  |  ands CARG1, PC, #FRAME_TYPE
+  |  beq ->BC_RET_Z			// Handle regular return to Lua.
+  |
+  |->vm_return:
+  |  // BASE = base, RA = resultptr, RC/MULTRES = (nresults+1)*8, PC = return
+  |  // CARG1 = PC & FRAME_TYPE
+  |  bic RB, PC, #FRAME_TYPEP
+  |   cmp CARG1, #FRAME_C
+  |  sub RB, BASE, RB			// RB = previous base.
+  |   bne ->vm_returnp
+  |
+  |  str RB, L->base
+  |   ldr KBASE, SAVE_NRES
+  |    mv_vmstate CARG4, C
+  |   sub BASE, BASE, #8
+  |  subs CARG3, RC, #8
+  |   lsl KBASE, KBASE, #3		// KBASE = (nresults_wanted+1)*8
+  |    st_vmstate CARG4
+  |  beq >2
+  |1:
+  |  subs CARG3, CARG3, #8
+  |   ldrd CARG12, [RA], #8
+  |   strd CARG12, [BASE], #8
+  |  bne <1
+  |2:
+  |  cmp KBASE, RC			// More/less results wanted?
+  |  bne >6
+  |3:
+  |  str BASE, L->top			// Store new top.
+  |
+  |->vm_leave_cp:
+  |  ldr RC, SAVE_CFRAME		// Restore previous C frame.
+  |   mov CRET1, #0			// Ok return status for vm_pcall.
+  |  str RC, L->cframe
+  |
+  |->vm_leave_unw:
+  |  restoreregs_ret
+  |
+  |6:
+  |  blt >7				// Less results wanted?
+  |  // More results wanted. Check stack size and fill up results with nil.
+  |  ldr CARG3, L->maxstack
+  |   mvn CARG2, #~LJ_TNIL
+  |  cmp BASE, CARG3
+  |  bhs >8
+  |   str CARG2, [BASE, #4]
+  |  add RC, RC, #8
+  |  add BASE, BASE, #8
+  |  b <2
+  |
+  |7:  // Less results wanted.
+  |  sub CARG1, RC, KBASE
+  |  cmp KBASE, #0			// LUA_MULTRET+1 case?
+  |  subne BASE, BASE, CARG1		// Either keep top or shrink it.
+  |  b <3
+  |
+  |8:  // Corner case: need to grow stack for filling up results.
+  |  // This can happen if:
+  |  // - A C function grows the stack (a lot).
+  |  // - The GC shrinks the stack in between.
+  |  // - A return back from a lua_call() with (high) nresults adjustment.
+  |  str BASE, L->top			// Save current top held in BASE (yes).
+  |  lsr CARG2, KBASE, #3
+  |  mov CARG1, L
+  |  bl extern lj_state_growstack	// (lua_State *L, int n)
+  |  ldr BASE, L->top			// Need the (realloced) L->top in BASE.
+  |  b <2
+  |
+  |->vm_unwind_c:			// Unwind C stack, return from vm_pcall.
+  |  // (void *cframe, int errcode)
+  |  mov sp, CARG1
+  |  mov CRET1, CARG2
+  |->vm_unwind_c_eh:			// Landing pad for external unwinder.
+  |  ldr L, SAVE_L
+  |   mv_vmstate CARG4, C
+  |  ldr GL:CARG3, L->glref
+  |   str CARG4, GL:CARG3->vmstate
+  |  b ->vm_leave_unw
+  |
+  |->vm_unwind_ff:			// Unwind C stack, return from ff pcall.
+  |  // (void *cframe)
+  |  bic CARG1, CARG1, #~CFRAME_RAWMASK	// Use two steps: bic sp is deprecated.
+  |  mov sp, CARG1
+  |->vm_unwind_ff_eh:			// Landing pad for external unwinder.
+  |  ldr L, SAVE_L
+  |   mov MASKR8, #255
+  |    mov RC, #16			// 2 results: false + error message.
+  |   lsl MASKR8, MASKR8, #3		// MASKR8 = 255*8.
+  |  ldr BASE, L->base
+  |   ldr DISPATCH, L->glref		// Setup pointer to dispatch table.
+  |    mvn CARG1, #~LJ_TFALSE
+  |  sub RA, BASE, #8			// Results start at BASE-8.
+  |  ldr PC, [BASE, FRAME_PC]		// Fetch PC of previous frame.
+  |   add DISPATCH, DISPATCH, #GG_G2DISP
+  |   mv_vmstate CARG2, INTERP
+  |    str CARG1, [BASE, #-4]		// Prepend false to error message.
+  |   st_vmstate CARG2
+  |  b ->vm_returnc
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Grow stack for calls -----------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |->vm_growstack_c:			// Grow stack for C function.
+  |  // CARG1 = L
+  |  mov CARG2, #LUA_MINSTACK
+  |  b >2
+  |
+  |->vm_growstack_l:			// Grow stack for Lua function.
+  |  // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
+  |  add RC, BASE, RC
+  |   sub RA, RA, BASE
+  |    mov CARG1, L
+  |  str BASE, L->base
+  |   add PC, PC, #4			// Must point after first instruction.
+  |  str RC, L->top
+  |   lsr CARG2, RA, #3
+  |2:
+  |  // L->base = new base, L->top = top
+  |  str PC, SAVE_PC
+  |  bl extern lj_state_growstack	// (lua_State *L, int n)
+  |  ldr BASE, L->base
+  |   ldr RC, L->top
+  |  ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
+  |   sub NARGS8:RC, RC, BASE
+  |  // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
+  |  ins_callt				// Just retry the call.
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Entry points into the assembler VM ---------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |->vm_resume:				// Setup C frame and resume thread.
+  |  // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
+  |  saveregs
+  |  mov L, CARG1
+  |    ldr DISPATCH, L:CARG1->glref	// Setup pointer to dispatch table.
+  |  mov BASE, CARG2
+  |    add DISPATCH, DISPATCH, #GG_G2DISP
+  |   str L, SAVE_L
+  |  mov PC, #FRAME_CP
+  |   str CARG3, SAVE_NRES
+  |    add CARG2, sp, #CFRAME_RESUME
+  |  ldrb CARG1, L->status
+  |   str CARG3, SAVE_ERRF
+  |    str CARG2, L->cframe
+  |   str CARG3, SAVE_CFRAME
+  |  cmp CARG1, #0
+  |   str L, SAVE_PC			// Any value outside of bytecode is ok.
+  |  beq >3
+  |
+  |  // Resume after yield (like a return).
+  |  mov RA, BASE
+  |   ldr BASE, L->base
+  |   ldr CARG1, L->top
+  |    mov MASKR8, #255
+  |     strb CARG3, L->status
+  |   sub RC, CARG1, BASE
+  |  ldr PC, [BASE, FRAME_PC]
+  |    lsl MASKR8, MASKR8, #3		// MASKR8 = 255*8.
+  |     mv_vmstate CARG2, INTERP
+  |   add RC, RC, #8
+  |  ands CARG1, PC, #FRAME_TYPE
+  |     st_vmstate CARG2
+  |   str RC, SAVE_MULTRES
+  |  beq ->BC_RET_Z
+  |  b ->vm_return
+  |
+  |->vm_pcall:				// Setup protected C frame and enter VM.
+  |  // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
+  |  saveregs
+  |  mov PC, #FRAME_CP
+  |  str CARG4, SAVE_ERRF
+  |  b >1
+  |
+  |->vm_call:				// Setup C frame and enter VM.
+  |  // (lua_State *L, TValue *base, int nres1)
+  |  saveregs
+  |  mov PC, #FRAME_C
+  |
+  |1:  // Entry point for vm_pcall above (PC = ftype).
+  |  ldr RC, L:CARG1->cframe
+  |   str CARG3, SAVE_NRES
+  |    mov L, CARG1
+  |   str CARG1, SAVE_L
+  |    mov BASE, CARG2
+  |  str sp, L->cframe			// Add our C frame to cframe chain.
+  |    ldr DISPATCH, L->glref		// Setup pointer to dispatch table.
+  |   str CARG1, SAVE_PC		// Any value outside of bytecode is ok.
+  |  str RC, SAVE_CFRAME
+  |    add DISPATCH, DISPATCH, #GG_G2DISP
+  |
+  |3:  // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
+  |  ldr RB, L->base			// RB = old base (for vmeta_call).
+  |   ldr CARG1, L->top
+  |    mov MASKR8, #255
+  |  add PC, PC, BASE
+  |    lsl MASKR8, MASKR8, #3		// MASKR8 = 255*8.
+  |  sub PC, PC, RB			// PC = frame delta + frame type
+  |    mv_vmstate CARG2, INTERP
+  |   sub NARGS8:RC, CARG1, BASE
+  |    st_vmstate CARG2
+  |
+  |->vm_call_dispatch:
+  |  // RB = old base, BASE = new base, RC = nargs*8, PC = caller PC
+  |  ldrd CARG34, [BASE, FRAME_FUNC]
+  |  checkfunc CARG4, ->vmeta_call
+  |
+  |->vm_call_dispatch_f:
+  |  ins_call
+  |  // BASE = new base, CARG3 = func, RC = nargs*8, PC = caller PC
+  |
+  |->vm_cpcall:				// Setup protected C frame, call C.
+  |  // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
+  |  saveregs
+  |  mov L, CARG1
+  |   ldr RA, L:CARG1->stack
+  |  str CARG1, SAVE_L
+  |   ldr RB, L->top
+  |  str CARG1, SAVE_PC			// Any value outside of bytecode is ok.
+  |  ldr RC, L->cframe
+  |   sub RA, RA, RB			// Compute -savestack(L, L->top).
+  |  str sp, L->cframe			// Add our C frame to cframe chain.
+  |  mov RB, #0
+  |   str RA, SAVE_NRES			// Neg. delta means cframe w/o frame.
+  |  str RB, SAVE_ERRF			// No error function.
+  |  str RC, SAVE_CFRAME
+  |  blx CARG4			// (lua_State *L, lua_CFunction func, void *ud)
+  |   ldr DISPATCH, L->glref		// Setup pointer to dispatch table.
+  |  movs BASE, CRET1
+  |    mov PC, #FRAME_CP
+  |   add DISPATCH, DISPATCH, #GG_G2DISP
+  |  bne <3				// Else continue with the call.
+  |  b ->vm_leave_cp			// No base? Just remove C frame.
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Metamethod handling ------------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |//-- Continuation dispatch ----------------------------------------------
+  |
+  |->cont_dispatch:
+  |  // BASE = meta base, RA = resultptr, RC = (nresults+1)*8
+  |  ldr LFUNC:CARG3, [RB, FRAME_FUNC]
+  |    ldr CARG1, [BASE, #-16]		// Get continuation.
+  |   mov CARG4, BASE
+  |   mov BASE, RB			// Restore caller BASE.
+  |.if FFI
+  |    cmp CARG1, #1
+  |.endif
+  |   ldr PC, [CARG4, #-12]		// Restore PC from [cont|PC].
+  |  ldr CARG3, LFUNC:CARG3->field_pc
+  |    mvn INS, #~LJ_TNIL
+  |    add CARG2, RA, RC
+  |    str INS, [CARG2, #-4]		// Ensure one valid arg.
+  |.if FFI
+  |    bls >1
+  |.endif
+  |  ldr KBASE, [CARG3, #PC2PROTO(k)]
+  |  // BASE = base, RA = resultptr, CARG4 = meta base
+  |    bx CARG1
+  |
+  |.if FFI
+  |1:
+  |  beq ->cont_ffi_callback		// cont = 1: return from FFI callback.
+  |  // cont = 0: tailcall from C function.
+  |  sub CARG4, CARG4, #16
+  |  sub RC, CARG4, BASE
+  |  b ->vm_call_tail
+  |.endif
+  |
+  |->cont_cat:				// RA = resultptr, CARG4 = meta base
+  |  ldr INS, [PC, #-4]
+  |   sub CARG2, CARG4, #16
+  |   ldrd CARG34, [RA]
+  |     str BASE, L->base
+  |  decode_RB8 RC, INS
+  |   decode_RA8 RA, INS
+  |  add CARG1, BASE, RC
+  |  subs CARG1, CARG2, CARG1
+  |   strdne CARG34, [CARG2]
+  |   movne CARG3, CARG1
+  |  bne ->BC_CAT_Z
+  |   strd CARG34, [BASE, RA]
+  |  b ->cont_nop
+  |
+  |//-- Table indexing metamethods -----------------------------------------
+  |
+  |->vmeta_tgets1:
+  |  add CARG2, BASE, RB
+  |  b >2
+  |
+  |->vmeta_tgets:
+  |  sub CARG2, DISPATCH, #-DISPATCH_GL(tmptv)
+  |   mvn CARG4, #~LJ_TTAB
+  |  str TAB:RB, [CARG2]
+  |   str CARG4, [CARG2, #4]
+  |2:
+  |   mvn CARG4, #~LJ_TSTR
+  |  str STR:RC, TMPDlo
+  |   str CARG4, TMPDhi
+  |  mov CARG3, TMPDp
+  |  b >1
+  |
+  |->vmeta_tgetb:			// RC = index
+  |  decode_RB8 RB, INS
+  |   str RC, TMPDlo
+  |   mvn CARG4, #~LJ_TISNUM
+  |  add CARG2, BASE, RB
+  |   str CARG4, TMPDhi
+  |  mov CARG3, TMPDp
+  |  b >1
+  |
+  |->vmeta_tgetv:
+  |  add CARG2, BASE, RB
+  |   add CARG3, BASE, RC
+  |1:
+  |   str BASE, L->base
+  |  mov CARG1, L
+  |   str PC, SAVE_PC
+  |  bl extern lj_meta_tget		// (lua_State *L, TValue *o, TValue *k)
+  |  // Returns TValue * (finished) or NULL (metamethod).
+  |  .IOS ldr BASE, L->base
+  |  cmp CRET1, #0
+  |  beq >3
+  |  ldrd CARG34, [CRET1]
+  |   ins_next1
+  |   ins_next2
+  |  strd CARG34, [BASE, RA]
+  |   ins_next3
+  |
+  |3:  // Call __index metamethod.
+  |  // BASE = base, L->top = new base, stack = cont/func/t/k
+  |   rsb CARG1, BASE, #FRAME_CONT
+  |  ldr BASE, L->top
+  |    mov NARGS8:RC, #16		// 2 args for func(t, k).
+  |    str PC, [BASE, #-12]		// [cont|PC]
+  |   add PC, CARG1, BASE
+  |  ldr LFUNC:CARG3, [BASE, FRAME_FUNC]  // Guaranteed to be a function here.
+  |  b ->vm_call_dispatch_f
+  |
+  |//-----------------------------------------------------------------------
+  |
+  |->vmeta_tsets1:
+  |  add CARG2, BASE, RB
+  |  b >2
+  |
+  |->vmeta_tsets:
+  |  sub CARG2, DISPATCH, #-DISPATCH_GL(tmptv)
+  |   mvn CARG4, #~LJ_TTAB
+  |  str TAB:RB, [CARG2]
+  |   str CARG4, [CARG2, #4]
+  |2:
+  |   mvn CARG4, #~LJ_TSTR
+  |  str STR:RC, TMPDlo
+  |   str CARG4, TMPDhi
+  |  mov CARG3, TMPDp
+  |  b >1
+  |
+  |->vmeta_tsetb:			// RC = index
+  |  decode_RB8 RB, INS
+  |   str RC, TMPDlo
+  |   mvn CARG4, #~LJ_TISNUM
+  |  add CARG2, BASE, RB
+  |   str CARG4, TMPDhi
+  |  mov CARG3, TMPDp
+  |  b >1
+  |
+  |->vmeta_tsetv:
+  |  add CARG2, BASE, RB
+  |   add CARG3, BASE, RC
+  |1:
+  |   str BASE, L->base
+  |  mov CARG1, L
+  |   str PC, SAVE_PC
+  |  bl extern lj_meta_tset		// (lua_State *L, TValue *o, TValue *k)
+  |  // Returns TValue * (finished) or NULL (metamethod).
+  |  .IOS ldr BASE, L->base
+  |  cmp CRET1, #0
+  |   ldrd CARG34, [BASE, RA]
+  |  beq >3
+  |   ins_next1
+  |  // NOBARRIER: lj_meta_tset ensures the table is not black.
+  |  strd CARG34, [CRET1]
+  |   ins_next2
+  |   ins_next3
+  |
+  |3:  // Call __newindex metamethod.
+  |  // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
+  |   rsb CARG1, BASE, #FRAME_CONT
+  |  ldr BASE, L->top
+  |    mov NARGS8:RC, #24		// 3 args for func(t, k, v).
+  |   strd CARG34, [BASE, #16]		// Copy value to third argument.
+  |    str PC, [BASE, #-12]		// [cont|PC]
+  |   add PC, CARG1, BASE
+  |  ldr LFUNC:CARG3, [BASE, FRAME_FUNC]  // Guaranteed to be a function here.
+  |  b ->vm_call_dispatch_f
+  |
+  |//-- Comparison metamethods ---------------------------------------------
+  |
+  |->vmeta_comp:
+  |  mov CARG1, L
+  |   sub PC, PC, #4
+  |  mov CARG2, RA
+  |   str BASE, L->base
+  |  mov CARG3, RC
+  |   str PC, SAVE_PC
+  |  decode_OP CARG4, INS
+  |  bl extern lj_meta_comp  // (lua_State *L, TValue *o1, *o2, int op)
+  |  // Returns 0/1 or TValue * (metamethod).
+  |3:
+  |  .IOS ldr BASE, L->base
+  |  cmp CRET1, #1
+  |  bhi ->vmeta_binop
+  |4:
+  |  ldrh RB, [PC, #2]
+  |   add PC, PC, #4
+  |  add RB, PC, RB, lsl #2
+  |  subhs PC, RB, #0x20000
+  |->cont_nop:
+  |  ins_next
+  |
+  |->cont_ra:				// RA = resultptr
+  |  ldr INS, [PC, #-4]
+  |   ldrd CARG12, [RA]
+  |  decode_RA8 CARG3, INS
+  |   strd CARG12, [BASE, CARG3]
+  |  b ->cont_nop
+  |
+  |->cont_condt:			// RA = resultptr
+  |  ldr CARG2, [RA, #4]
+  |   mvn CARG1, #~LJ_TTRUE
+  |  cmp CARG1, CARG2			// Branch if result is true.
+  |  b <4
+  |
+  |->cont_condf:			// RA = resultptr
+  |  ldr CARG2, [RA, #4]
+  |  checktp CARG2, LJ_TFALSE		// Branch if result is false.
+  |  b <4
+  |
+  |->vmeta_equal:
+  |  // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV.
+  |  sub PC, PC, #4
+  |   str BASE, L->base
+  |   mov CARG1, L
+  |  str PC, SAVE_PC
+  |  bl extern lj_meta_equal  // (lua_State *L, GCobj *o1, *o2, int ne)
+  |  // Returns 0/1 or TValue * (metamethod).
+  |  b <3
+  |
+  |->vmeta_equal_cd:
+  |.if FFI
+  |  sub PC, PC, #4
+  |   str BASE, L->base
+  |   mov CARG1, L
+  |   mov CARG2, INS
+  |  str PC, SAVE_PC
+  |  bl extern lj_meta_equal_cd		// (lua_State *L, BCIns op)
+  |  // Returns 0/1 or TValue * (metamethod).
+  |  b <3
+  |.endif
+  |
+  |//-- Arithmetic metamethods ---------------------------------------------
+  |
+  |->vmeta_arith_vn:
+  |  decode_RB8 RB, INS
+  |   decode_RC8 RC, INS
+  |  add CARG3, BASE, RB
+  |   add CARG4, KBASE, RC
+  |  b >1
+  |
+  |->vmeta_arith_nv:
+  |  decode_RB8 RB, INS
+  |   decode_RC8 RC, INS
+  |  add CARG4, BASE, RB
+  |   add CARG3, KBASE, RC
+  |  b >1
+  |
+  |->vmeta_unm:
+  |  ldr INS, [PC, #-8]
+  |   sub PC, PC, #4
+  |  add CARG3, BASE, RC
+  |  add CARG4, BASE, RC
+  |  b >1
+  |
+  |->vmeta_arith_vv:
+  |  decode_RB8 RB, INS
+  |   decode_RC8 RC, INS
+  |  add CARG3, BASE, RB
+  |   add CARG4, BASE, RC
+  |1:
+  |  decode_OP OP, INS
+  |   add CARG2, BASE, RA
+  |    str BASE, L->base
+  |   mov CARG1, L
+  |    str PC, SAVE_PC
+  |  str OP, ARG5
+  |  bl extern lj_meta_arith  // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
+  |  // Returns NULL (finished) or TValue * (metamethod).
+  |  .IOS ldr BASE, L->base
+  |  cmp CRET1, #0
+  |  beq ->cont_nop
+  |
+  |  // Call metamethod for binary op.
+  |->vmeta_binop:
+  |  // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
+  |  sub CARG2, CRET1, BASE
+  |   str PC, [CRET1, #-12]		// [cont|PC]
+  |  add PC, CARG2, #FRAME_CONT
+  |   mov BASE, CRET1
+  |    mov NARGS8:RC, #16		// 2 args for func(o1, o2).
+  |  b ->vm_call_dispatch
+  |
+  |->vmeta_len:
+  |  add CARG2, BASE, RC
+  |   str BASE, L->base
+  |  mov CARG1, L
+  |   str PC, SAVE_PC
+  |  bl extern lj_meta_len		// (lua_State *L, TValue *o)
+  |  // Returns NULL (retry) or TValue * (metamethod base).
+  |  .IOS ldr BASE, L->base
+#if LJ_52
+  |  cmp CRET1, #0
+  |  bne ->vmeta_binop			// Binop call for compatibility.
+  |  ldr TAB:CARG1, [BASE, RC]
+  |  b ->BC_LEN_Z
+#else
+  |  b ->vmeta_binop			// Binop call for compatibility.
+#endif
+  |
+  |//-- Call metamethod ----------------------------------------------------
+  |
+  |->vmeta_call:			// Resolve and call __call metamethod.
+  |  // RB = old base, BASE = new base, RC = nargs*8
+  |  mov CARG1, L
+  |   str RB, L->base			// This is the callers base!
+  |  sub CARG2, BASE, #8
+  |   str PC, SAVE_PC
+  |  add CARG3, BASE, NARGS8:RC
+  |  .IOS mov RA, BASE
+  |  bl extern lj_meta_call	// (lua_State *L, TValue *func, TValue *top)
+  |  .IOS mov BASE, RA
+  |  ldr LFUNC:CARG3, [BASE, FRAME_FUNC]  // Guaranteed to be a function here.
+  |   add NARGS8:RC, NARGS8:RC, #8	// Got one more argument now.
+  |  ins_call
+  |
+  |->vmeta_callt:			// Resolve __call for BC_CALLT.
+  |  // BASE = old base, RA = new base, RC = nargs*8
+  |  mov CARG1, L
+  |   str BASE, L->base
+  |  sub CARG2, RA, #8
+  |   str PC, SAVE_PC
+  |  add CARG3, RA, NARGS8:RC
+  |  bl extern lj_meta_call	// (lua_State *L, TValue *func, TValue *top)
+  |  .IOS ldr BASE, L->base
+  |  ldr LFUNC:CARG3, [RA, FRAME_FUNC]  // Guaranteed to be a function here.
+  |   ldr PC, [BASE, FRAME_PC]
+  |    add NARGS8:RC, NARGS8:RC, #8	// Got one more argument now.
+  |  b ->BC_CALLT2_Z
+  |
+  |//-- Argument coercion for 'for' statement ------------------------------
+  |
+  |->vmeta_for:
+  |  mov CARG1, L
+  |   str BASE, L->base
+  |  mov CARG2, RA
+  |   str PC, SAVE_PC
+  |  bl extern lj_meta_for	// (lua_State *L, TValue *base)
+  |  .IOS ldr BASE, L->base
+  |.if JIT
+  |   ldrb OP, [PC, #-4]
+  |.endif
+  |  ldr INS, [PC, #-4]
+  |.if JIT
+  |   cmp OP, #BC_JFORI
+  |.endif
+  |  decode_RA8 RA, INS
+  |  decode_RD RC, INS
+  |.if JIT
+  |   beq =>BC_JFORI
+  |.endif
+  |  b =>BC_FORI
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Fast functions -----------------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |.macro .ffunc, name
+  |->ff_ .. name:
+  |.endmacro
+  |
+  |.macro .ffunc_1, name
+  |->ff_ .. name:
+  |  ldrd CARG12, [BASE]
+  |   cmp NARGS8:RC, #8
+  |   blo ->fff_fallback
+  |.endmacro
+  |
+  |.macro .ffunc_2, name
+  |->ff_ .. name:
+  |  ldrd CARG12, [BASE]
+  |   ldrd CARG34, [BASE, #8]
+  |    cmp NARGS8:RC, #16
+  |    blo ->fff_fallback
+  |.endmacro
+  |
+  |.macro .ffunc_n, name
+  |  .ffunc_1 name
+  |  checktp CARG2, LJ_TISNUM
+  |  bhs ->fff_fallback
+  |.endmacro
+  |
+  |.macro .ffunc_nn, name
+  |  .ffunc_2 name
+  |  checktp CARG2, LJ_TISNUM
+  |  cmnlo CARG4, #-LJ_TISNUM
+  |  bhs ->fff_fallback
+  |.endmacro
+  |
+  |.macro .ffunc_d, name
+  |  .ffunc name
+  |  ldr CARG2, [BASE, #4]
+  |   cmp NARGS8:RC, #8
+  |  vldr d0, [BASE]
+  |   blo ->fff_fallback
+  |  checktp CARG2, LJ_TISNUM
+  |  bhs ->fff_fallback
+  |.endmacro
+  |
+  |.macro .ffunc_dd, name
+  |  .ffunc name
+  |  ldr CARG2, [BASE, #4]
+  |  ldr CARG4, [BASE, #12]
+  |   cmp NARGS8:RC, #16
+  |  vldr d0, [BASE]
+  |  vldr d1, [BASE, #8]
+  |   blo ->fff_fallback
+  |  checktp CARG2, LJ_TISNUM
+  |  cmnlo CARG4, #-LJ_TISNUM
+  |  bhs ->fff_fallback
+  |.endmacro
+  |
+  |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2.
+  |.macro ffgccheck
+  |  ldr CARG1, [DISPATCH, #DISPATCH_GL(gc.total)]
+  |  ldr CARG2, [DISPATCH, #DISPATCH_GL(gc.threshold)]
+  |  cmp CARG1, CARG2
+  |  blge ->fff_gcstep
+  |.endmacro
+  |
+  |//-- Base library: checks -----------------------------------------------
+  |
+  |.ffunc_1 assert
+  |  checktp CARG2, LJ_TTRUE
+  |  bhi ->fff_fallback
+  |   ldr PC, [BASE, FRAME_PC]
+  |  strd CARG12, [BASE, #-8]
+  |  mov RB, BASE
+  |  subs RA, NARGS8:RC, #8
+  |   add RC, NARGS8:RC, #8		// Compute (nresults+1)*8.
+  |  beq ->fff_res			// Done if exactly 1 argument.
+  |1:
+  |   ldrd CARG12, [RB, #8]
+  |  subs RA, RA, #8
+  |   strd CARG12, [RB], #8
+  |  bne <1
+  |  b ->fff_res
+  |
+  |.ffunc type
+  |  ldr CARG2, [BASE, #4]
+  |   cmp NARGS8:RC, #8
+  |   blo ->fff_fallback
+  |  checktp CARG2, LJ_TISNUM
+  |  mvnlo CARG2, #~LJ_TISNUM
+  |  rsb CARG4, CARG2, #(int)(offsetof(GCfuncC, upvalue)>>3)-1
+  |  lsl CARG4, CARG4, #3
+  |  ldrd CARG12, [CFUNC:CARG3, CARG4]
+  |  b ->fff_restv
+  |
+  |//-- Base library: getters and setters ---------------------------------
+  |
+  |.ffunc_1 getmetatable
+  |  checktp CARG2, LJ_TTAB
+  |  cmnne CARG2, #-LJ_TUDATA
+  |  bne >6
+  |1:  // Field metatable must be at same offset for GCtab and GCudata!
+  |  ldr TAB:RB, TAB:CARG1->metatable
+  |2:
+  |   mvn CARG2, #~LJ_TNIL
+  |   ldr STR:RC, [DISPATCH, #DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])]
+  |  cmp TAB:RB, #0
+  |  beq ->fff_restv
+  |  ldr CARG3, TAB:RB->hmask
+  |   ldr CARG4, STR:RC->hash
+  |    ldr NODE:INS, TAB:RB->node
+  |  and CARG3, CARG3, CARG4		// idx = str->hash & tab->hmask
+  |  add CARG3, CARG3, CARG3, lsl #1
+  |    add NODE:INS, NODE:INS, CARG3, lsl #3	// node = tab->node + idx*3*8
+  |3:  // Rearranged logic, because we expect _not_ to find the key.
+  |  ldrd CARG34, NODE:INS->key  // STALL: early NODE:INS.
+  |   ldrd CARG12, NODE:INS->val
+  |    ldr NODE:INS, NODE:INS->next
+  |  checktp CARG4, LJ_TSTR
+  |  cmpeq CARG3, STR:RC
+  |  beq >5
+  |  cmp NODE:INS, #0
+  |  bne <3
+  |4:
+  |  mov CARG1, RB			// Use metatable as default result.
+  |  mvn CARG2, #~LJ_TTAB
+  |  b ->fff_restv
+  |5:
+  |  checktp CARG2, LJ_TNIL
+  |  bne ->fff_restv
+  |  b <4
+  |
+  |6:
+  |  checktp CARG2, LJ_TISNUM
+  |  mvnhs CARG2, CARG2
+  |  movlo CARG2, #~LJ_TISNUM
+  |  add CARG4, DISPATCH, CARG2, lsl #2
+  |  ldr TAB:RB, [CARG4, #DISPATCH_GL(gcroot[GCROOT_BASEMT])]
+  |  b <2
+  |
+  |.ffunc_2 setmetatable
+  |  // Fast path: no mt for table yet and not clearing the mt.
+  |  checktp CARG2, LJ_TTAB
+  |   ldreq TAB:RB, TAB:CARG1->metatable
+  |  checktpeq CARG4, LJ_TTAB
+  |    ldrbeq CARG4, TAB:CARG1->marked
+  |   cmpeq TAB:RB, #0
+  |  bne ->fff_fallback
+  |    tst CARG4, #LJ_GC_BLACK		// isblack(table)
+  |     str TAB:CARG3, TAB:CARG1->metatable
+  |    beq ->fff_restv
+  |  barrierback TAB:CARG1, CARG4, CARG3
+  |  b ->fff_restv
+  |
+  |.ffunc rawget
+  |  ldrd CARG34, [BASE]
+  |   cmp NARGS8:RC, #16
+  |   blo ->fff_fallback
+  |   mov CARG2, CARG3
+  |  checktab CARG4, ->fff_fallback
+  |   mov CARG1, L
+  |   add CARG3, BASE, #8
+  |  .IOS mov RA, BASE
+  |  bl extern lj_tab_get  // (lua_State *L, GCtab *t, cTValue *key)
+  |  // Returns cTValue *.
+  |  .IOS mov BASE, RA
+  |  ldrd CARG12, [CRET1]
+  |  b ->fff_restv
+  |
+  |//-- Base library: conversions ------------------------------------------
+  |
+  |.ffunc tonumber
+  |  // Only handles the number case inline (without a base argument).
+  |  ldrd CARG12, [BASE]
+  |   cmp NARGS8:RC, #8
+  |   bne ->fff_fallback
+  |  checktp CARG2, LJ_TISNUM
+  |  bls ->fff_restv
+  |  b ->fff_fallback
+  |
+  |.ffunc_1 tostring
+  |  // Only handles the string or number case inline.
+  |  checktp CARG2, LJ_TSTR
+  |  // A __tostring method in the string base metatable is ignored.
+  |  beq ->fff_restv
+  |  // Handle numbers inline, unless a number base metatable is present.
+  |  ldr CARG4, [DISPATCH, #DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])]
+  |   str BASE, L->base
+  |  checktp CARG2, LJ_TISNUM
+  |  cmpls CARG4, #0
+  |   str PC, SAVE_PC			// Redundant (but a defined value).
+  |  bhi ->fff_fallback
+  |  ffgccheck
+  |  mov CARG1, L
+  |  mov CARG2, BASE
+  |  bl extern lj_str_fromnumber	// (lua_State *L, cTValue *o)
+  |  // Returns GCstr *.
+  |  ldr BASE, L->base
+  |  mvn CARG2, #~LJ_TSTR
+  |  b ->fff_restv
+  |
+  |//-- Base library: iterators -------------------------------------------
+  |
+  |.ffunc_1 next
+  |   mvn CARG4, #~LJ_TNIL
+  |  checktab CARG2, ->fff_fallback
+  |   strd CARG34, [BASE, NARGS8:RC]	// Set missing 2nd arg to nil.
+  |   ldr PC, [BASE, FRAME_PC]
+  |  mov CARG2, CARG1
+  |    str BASE, L->base		// Add frame since C call can throw.
+  |  mov CARG1, L
+  |    str BASE, L->top			// Dummy frame length is ok.
+  |  add CARG3, BASE, #8
+  |   str PC, SAVE_PC
+  |  bl extern lj_tab_next	// (lua_State *L, GCtab *t, TValue *key)
+  |  // Returns 0 at end of traversal.
+  |  .IOS ldr BASE, L->base
+  |  cmp CRET1, #0
+  |  mvneq CRET2, #~LJ_TNIL
+  |  beq ->fff_restv			// End of traversal: return nil.
+  |  ldrd CARG12, [BASE, #8]		// Copy key and value to results.
+  |   ldrd CARG34, [BASE, #16]
+  |    mov RC, #(2+1)*8
+  |  strd CARG12, [BASE, #-8]
+  |   strd CARG34, [BASE]
+  |  b ->fff_res
+  |
+  |.ffunc_1 pairs
+  |  checktab CARG2, ->fff_fallback
+#if LJ_52
+  |  ldr TAB:RB, TAB:CARG1->metatable
+#endif
+  |   ldrd CFUNC:CARG34, CFUNC:CARG3->upvalue[0]
+  |    ldr PC, [BASE, FRAME_PC]
+#if LJ_52
+  |  cmp TAB:RB, #0
+  |  bne ->fff_fallback
+#endif
+  |  mvn CARG2, #~LJ_TNIL
+  |    mov RC, #(3+1)*8
+  |   strd CFUNC:CARG34, [BASE, #-8]
+  |  str CARG2, [BASE, #12]
+  |  b ->fff_res
+  |
+  |.ffunc_2 ipairs_aux
+  |  checktp CARG2, LJ_TTAB
+  |  checktpeq CARG4, LJ_TISNUM
+  |  bne ->fff_fallback
+  |  ldr RB, TAB:CARG1->asize
+  |   ldr RC, TAB:CARG1->array
+  |  add CARG3, CARG3, #1
+  |    ldr PC, [BASE, FRAME_PC]
+  |  cmp CARG3, RB
+  |   add RC, RC, CARG3, lsl #3
+  |  strd CARG34, [BASE, #-8]
+  |   ldrdlo CARG12, [RC]
+  |   mov RC, #(0+1)*8
+  |  bhs >2				// Not in array part?
+  |1:
+  |   checktp CARG2, LJ_TNIL
+  |   movne RC, #(2+1)*8
+  |   strdne CARG12, [BASE]
+  |  b ->fff_res
+  |2:  // Check for empty hash part first. Otherwise call C function.
+  |  ldr RB, TAB:CARG1->hmask
+  |   mov CARG2, CARG3
+  |  cmp RB, #0
+  |  beq ->fff_res
+  |  .IOS mov RA, BASE
+  |  bl extern lj_tab_getinth		// (GCtab *t, int32_t key)
+  |  // Returns cTValue * or NULL.
+  |  .IOS mov BASE, RA
+  |  cmp CRET1, #0
+  |  beq ->fff_res
+  |  ldrd CARG12, [CRET1]
+  |  b <1
+  |
+  |.ffunc_1 ipairs
+  |  checktab CARG2, ->fff_fallback
+#if LJ_52
+  |  ldr TAB:RB, TAB:CARG1->metatable
+#endif
+  |   ldrd CFUNC:CARG34, CFUNC:CARG3->upvalue[0]
+  |    ldr PC, [BASE, FRAME_PC]
+#if LJ_52
+  |  cmp TAB:RB, #0
+  |  bne ->fff_fallback
+#endif
+  |  mov CARG1, #0
+  |  mvn CARG2, #~LJ_TISNUM
+  |    mov RC, #(3+1)*8
+  |   strd CFUNC:CARG34, [BASE, #-8]
+  |  strd CARG12, [BASE, #8]
+  |  b ->fff_res
+  |
+  |//-- Base library: catch errors ----------------------------------------
+  |
+  |.ffunc pcall
+  |  ldrb RA, [DISPATCH, #DISPATCH_GL(hookmask)]
+  |   cmp NARGS8:RC, #8
+  |   blo ->fff_fallback
+  |  tst RA, #HOOK_ACTIVE		// Remember active hook before pcall.
+  |   mov RB, BASE
+  |   add BASE, BASE, #8
+  |  moveq PC, #8+FRAME_PCALL
+  |  movne PC, #8+FRAME_PCALLH
+  |   sub NARGS8:RC, NARGS8:RC, #8
+  |  b ->vm_call_dispatch
+  |
+  |.ffunc_2 xpcall
+  |  ldrb RA, [DISPATCH, #DISPATCH_GL(hookmask)]
+  |  checkfunc CARG4, ->fff_fallback	// Traceback must be a function.
+  |   mov RB, BASE
+  |  strd CARG12, [BASE, #8]		// Swap function and traceback.
+  |   strd CARG34, [BASE]
+  |  tst RA, #HOOK_ACTIVE		// Remember active hook before pcall.
+  |   add BASE, BASE, #16
+  |  moveq PC, #16+FRAME_PCALL
+  |  movne PC, #16+FRAME_PCALLH
+  |   sub NARGS8:RC, NARGS8:RC, #16
+  |  b ->vm_call_dispatch
+  |
+  |//-- Coroutine library --------------------------------------------------
+  |
+  |.macro coroutine_resume_wrap, resume
+  |.if resume
+  |.ffunc_1 coroutine_resume
+  |  checktp CARG2, LJ_TTHREAD
+  |  bne ->fff_fallback
+  |.else
+  |.ffunc coroutine_wrap_aux
+  |  ldr L:CARG1, CFUNC:CARG3->upvalue[0].gcr
+  |.endif
+  |   ldr PC, [BASE, FRAME_PC]
+  |     str BASE, L->base
+  |  ldr CARG2, L:CARG1->top
+  |   ldrb RA, L:CARG1->status
+  |    ldr RB, L:CARG1->base
+  |  add CARG3, CARG2, NARGS8:RC
+  |  add CARG4, CARG2, RA
+  |   str PC, SAVE_PC
+  |  cmp CARG4, RB
+  |  beq ->fff_fallback
+  |   ldr CARG4, L:CARG1->maxstack
+  |    ldr RB, L:CARG1->cframe
+  |   cmp RA, #LUA_YIELD
+  |   cmpls CARG3, CARG4
+  |    cmpls RB, #0
+  |    bhi ->fff_fallback
+  |1:
+  |.if resume
+  |  sub CARG3, CARG3, #8		// Keep resumed thread in stack for GC.
+  |  add BASE, BASE, #8
+  |  sub NARGS8:RC, NARGS8:RC, #8
+  |.endif
+  |  str CARG3, L:CARG1->top
+  |  str BASE, L->top
+  |2:  // Move args to coroutine.
+  |   ldrd CARG34, [BASE, RB]
+  |  cmp RB, NARGS8:RC
+  |   strdne CARG34, [CARG2, RB]
+  |  add RB, RB, #8
+  |  bne <2
+  |
+  |  mov CARG3, #0
+  |   mov L:RA, L:CARG1
+  |  mov CARG4, #0
+  |  bl ->vm_resume			// (lua_State *L, TValue *base, 0, 0)
+  |  // Returns thread status.
+  |4:
+  |  ldr CARG3, L:RA->base
+  |    mv_vmstate CARG2, INTERP
+  |  ldr CARG4, L:RA->top
+  |    st_vmstate CARG2
+  |   cmp CRET1, #LUA_YIELD
+  |  ldr BASE, L->base
+  |   bhi >8
+  |  subs RC, CARG4, CARG3
+  |   ldr CARG1, L->maxstack
+  |   add CARG2, BASE, RC
+  |  beq >6				// No results?
+  |  cmp CARG2, CARG1
+  |   mov RB, #0
+  |  bhi >9				// Need to grow stack?
+  |
+  |  sub CARG4, RC, #8
+  |   str CARG3, L:RA->top		// Clear coroutine stack.
+  |5:  // Move results from coroutine.
+  |   ldrd CARG12, [CARG3, RB]
+  |  cmp RB, CARG4
+  |   strd CARG12, [BASE, RB]
+  |  add RB, RB, #8
+  |  bne <5
+  |6:
+  |.if resume
+  |  mvn CARG3, #~LJ_TTRUE
+  |   add RC, RC, #16
+  |7:
+  |  str CARG3, [BASE, #-4]		// Prepend true/false to results.
+  |   sub RA, BASE, #8
+  |.else
+  |   mov RA, BASE
+  |   add RC, RC, #8
+  |.endif
+  |  ands CARG1, PC, #FRAME_TYPE
+  |   str PC, SAVE_PC
+  |   str RC, SAVE_MULTRES
+  |  beq ->BC_RET_Z
+  |  b ->vm_return
+  |
+  |8:  // Coroutine returned with error (at co->top-1).
+  |.if resume
+  |  ldrd CARG12, [CARG4, #-8]!
+  |   mvn CARG3, #~LJ_TFALSE
+  |    mov RC, #(2+1)*8
+  |  str CARG4, L:RA->top		// Remove error from coroutine stack.
+  |  strd CARG12, [BASE]		// Copy error message.
+  |  b <7
+  |.else
+  |  mov CARG1, L
+  |  mov CARG2, L:RA
+  |  bl extern lj_ffh_coroutine_wrap_err  // (lua_State *L, lua_State *co)
+  |  // Never returns.
+  |.endif
+  |
+  |9:  // Handle stack expansion on return from yield.
+  |  mov CARG1, L
+  |  lsr CARG2, RC, #3
+  |  bl extern lj_state_growstack	// (lua_State *L, int n)
+  |  mov CRET1, #0
+  |  b <4
+  |.endmacro
+  |
+  |  coroutine_resume_wrap 1		// coroutine.resume
+  |  coroutine_resume_wrap 0		// coroutine.wrap
+  |
+  |.ffunc coroutine_yield
+  |  ldr CARG1, L->cframe
+  |   add CARG2, BASE, NARGS8:RC
+  |   str BASE, L->base
+  |  tst CARG1, #CFRAME_RESUME
+  |   str CARG2, L->top
+  |    mov CRET1, #LUA_YIELD
+  |   mov CARG3, #0
+  |  beq ->fff_fallback
+  |   str CARG3, L->cframe
+  |    strb CRET1, L->status
+  |  b ->vm_leave_unw
+  |
+  |//-- Math library -------------------------------------------------------
+  |
+  |.macro math_round, func
+  |  .ffunc_1 math_ .. func
+  |  checktp CARG2, LJ_TISNUM
+  |  beq ->fff_restv
+  |  bhi ->fff_fallback
+  |  // Round FP value and normalize result.
+  |  lsl CARG3, CARG2, #1
+  |  adds RB, CARG3, #0x00200000
+  |  bpl >2				// |x| < 1?
+  |  mvn CARG4, #0x3e0
+  |    subs RB, CARG4, RB, asr #21
+  |  lsl CARG4, CARG2, #11
+  |   lsl CARG3, CARG1, #11
+  |  orr CARG4, CARG4, #0x80000000
+  |   rsb INS, RB, #32
+  |  orr CARG4, CARG4, CARG1, lsr #21
+  |    bls >3				// |x| >= 2^31?
+  |   orr CARG3, CARG3, CARG4, lsl INS
+  |  lsr CARG1, CARG4, RB
+  |.if "func" == "floor"
+  |   tst CARG3, CARG2, asr #31
+  |   addne CARG1, CARG1, #1
+  |.else
+  |   bics CARG3, CARG3, CARG2, asr #31
+  |   addsne CARG1, CARG1, #1
+  |   ldrdvs CARG12, >9
+  |   bvs ->fff_restv
+  |.endif
+  |    cmp CARG2, #0
+  |    rsblt CARG1, CARG1, #0
+  |1:
+  |   mvn CARG2, #~LJ_TISNUM
+  |  b ->fff_restv
+  |
+  |2:  // |x| < 1
+  |  bcs ->fff_restv			// |x| is not finite.
+  |  orr CARG3, CARG3, CARG1		// ztest = abs(hi) | lo
+  |.if "func" == "floor"
+  |  tst CARG3, CARG2, asr #31		// return (ztest & sign) == 0 ? 0 : -1
+  |  moveq CARG1, #0
+  |  mvnne CARG1, #0
+  |.else
+  |  bics CARG3, CARG3, CARG2, asr #31	// return (ztest & ~sign) == 0 ? 0 : 1
+  |  moveq CARG1, #0
+  |  movne CARG1, #1
+  |.endif
+  |  mvn CARG2, #~LJ_TISNUM
+  |  b ->fff_restv
+  |
+  |3:  // |x| >= 2^31. Check for x == -(2^31).
+  |  cmpeq CARG4, #0x80000000
+  |.if "func" == "floor"
+  |  cmpeq CARG3, #0
+  |.endif
+  |  bne >4
+  |  cmp CARG2, #0
+  |  movmi CARG1, #0x80000000
+  |  bmi <1
+  |4:
+  |  bl ->vm_..func.._sf
+  |  b ->fff_restv
+  |.endmacro
+  |
+  |  math_round floor
+  |  math_round ceil
+  |
+  |.align 8
+  |9:
+  |  .long 0x00000000, 0x41e00000	// 2^31.
+  |
+  |.ffunc_1 math_abs
+  |  checktp CARG2, LJ_TISNUM
+  |  bhi ->fff_fallback
+  |  bicne CARG2, CARG2, #0x80000000
+  |  bne ->fff_restv
+  |  cmp CARG1, #0
+  |  rsbslt CARG1, CARG1, #0
+  |  ldrdvs CARG12, <9
+  |  // Fallthrough.
+  |
+  |->fff_restv:
+  |  // CARG12 = TValue result.
+  |  ldr PC, [BASE, FRAME_PC]
+  |  strd CARG12, [BASE, #-8]
+  |->fff_res1:
+  |  // PC = return.
+  |  mov RC, #(1+1)*8
+  |->fff_res:
+  |  // RC = (nresults+1)*8, PC = return.
+  |  ands CARG1, PC, #FRAME_TYPE
+  |  ldreq INS, [PC, #-4]
+  |   str RC, SAVE_MULTRES
+  |  sub RA, BASE, #8
+  |  bne ->vm_return
+  |  decode_RB8 RB, INS
+  |5:
+  |  cmp RB, RC				// More results expected?
+  |  bhi >6
+  |  decode_RA8 CARG1, INS
+  |   ins_next1
+  |   ins_next2
+  |  // Adjust BASE. KBASE is assumed to be set for the calling frame.
+  |  sub BASE, RA, CARG1
+  |   ins_next3
+  |
+  |6:  // Fill up results with nil.
+  |  add CARG2, RA, RC
+  |  mvn CARG1, #~LJ_TNIL
+  |   add RC, RC, #8
+  |  str CARG1, [CARG2, #-4]
+  |  b <5
+  |
+  |.macro math_extern, func
+  |.if HFABI
+  |  .ffunc_d math_ .. func
+  |.else
+  |  .ffunc_n math_ .. func
+  |.endif
+  |  .IOS mov RA, BASE
+  |  bl extern func
+  |  .IOS mov BASE, RA
+  |.if HFABI
+  |  b ->fff_resd
+  |.else
+  |  b ->fff_restv
+  |.endif
+  |.endmacro
+  |
+  |.macro math_extern2, func
+  |.if HFABI
+  |  .ffunc_dd math_ .. func
+  |.else
+  |  .ffunc_nn math_ .. func
+  |.endif
+  |  .IOS mov RA, BASE
+  |  bl extern func
+  |  .IOS mov BASE, RA
+  |.if HFABI
+  |  b ->fff_resd
+  |.else
+  |  b ->fff_restv
+  |.endif
+  |.endmacro
+  |
+  |.if FPU
+  |  .ffunc_d math_sqrt
+  |  vsqrt.f64 d0, d0
+  |->fff_resd:
+  |  ldr PC, [BASE, FRAME_PC]
+  |  vstr d0, [BASE, #-8]
+  |  b ->fff_res1
+  |.else
+  |  math_extern sqrt
+  |.endif
+  |
+  |.ffunc math_log
+  |.if HFABI
+  |  ldr CARG2, [BASE, #4]
+  |   cmp NARGS8:RC, #8			// Need exactly 1 argument.
+  |  vldr d0, [BASE]
+  |   bne ->fff_fallback
+  |.else
+  |  ldrd CARG12, [BASE]
+  |   cmp NARGS8:RC, #8			// Need exactly 1 argument.
+  |   bne ->fff_fallback
+  |.endif
+  |  checktp CARG2, LJ_TISNUM
+  |  bhs ->fff_fallback
+  |  .IOS mov RA, BASE
+  |  bl extern log
+  |  .IOS mov BASE, RA
+  |.if HFABI
+  |  b ->fff_resd
+  |.else
+  |  b ->fff_restv
+  |.endif
+  |
+  |  math_extern log10
+  |  math_extern exp
+  |  math_extern sin
+  |  math_extern cos
+  |  math_extern tan
+  |  math_extern asin
+  |  math_extern acos
+  |  math_extern atan
+  |  math_extern sinh
+  |  math_extern cosh
+  |  math_extern tanh
+  |  math_extern2 pow
+  |  math_extern2 atan2
+  |  math_extern2 fmod
+  |
+  |->ff_math_deg:
+  |.if FPU
+  |  .ffunc_d math_rad
+  |  vldr d1, CFUNC:CARG3->upvalue[0]
+  |  vmul.f64 d0, d0, d1
+  |  b ->fff_resd
+  |.else
+  |  .ffunc_n math_rad
+  |  ldrd CARG34, CFUNC:CARG3->upvalue[0]
+  |  bl extern __aeabi_dmul
+  |  b ->fff_restv
+  |.endif
+  |
+  |.if HFABI
+  |  .ffunc math_ldexp
+  |  ldr CARG4, [BASE, #4]
+  |  ldrd CARG12, [BASE, #8]
+  |   cmp NARGS8:RC, #16
+  |   blo ->fff_fallback
+  |  vldr d0, [BASE]
+  |  checktp CARG4, LJ_TISNUM
+  |  bhs ->fff_fallback
+  |  checktp CARG2, LJ_TISNUM
+  |  bne ->fff_fallback
+  |  .IOS mov RA, BASE
+  |  bl extern ldexp			// (double x, int exp)
+  |  .IOS mov BASE, RA
+  |  b ->fff_resd
+  |.else
+  |.ffunc_2 math_ldexp
+  |  checktp CARG2, LJ_TISNUM
+  |  bhs ->fff_fallback
+  |  checktp CARG4, LJ_TISNUM
+  |  bne ->fff_fallback
+  |  .IOS mov RA, BASE
+  |  bl extern ldexp			// (double x, int exp)
+  |  .IOS mov BASE, RA
+  |  b ->fff_restv
+  |.endif
+  |
+  |.if HFABI
+  |.ffunc_d math_frexp
+  |  mov CARG1, sp
+  |  .IOS mov RA, BASE
+  |  bl extern frexp
+  |  .IOS mov BASE, RA
+  |   ldr CARG3, [sp]
+  |   mvn CARG4, #~LJ_TISNUM
+  |    ldr PC, [BASE, FRAME_PC]
+  |  vstr d0, [BASE, #-8]
+  |    mov RC, #(2+1)*8
+  |   strd CARG34, [BASE]
+  |  b ->fff_res
+  |.else
+  |.ffunc_n math_frexp
+  |  mov CARG3, sp
+  |  .IOS mov RA, BASE
+  |  bl extern frexp
+  |  .IOS mov BASE, RA
+  |   ldr CARG3, [sp]
+  |   mvn CARG4, #~LJ_TISNUM
+  |    ldr PC, [BASE, FRAME_PC]
+  |  strd CARG12, [BASE, #-8]
+  |    mov RC, #(2+1)*8
+  |   strd CARG34, [BASE]
+  |  b ->fff_res
+  |.endif
+  |
+  |.if HFABI
+  |.ffunc_d math_modf
+  |  sub CARG1, BASE, #8
+  |   ldr PC, [BASE, FRAME_PC]
+  |  .IOS mov RA, BASE
+  |  bl extern modf
+  |  .IOS mov BASE, RA
+  |   mov RC, #(2+1)*8
+  |  vstr d0, [BASE]
+  |  b ->fff_res
+  |.else
+  |.ffunc_n math_modf
+  |  sub CARG3, BASE, #8
+  |   ldr PC, [BASE, FRAME_PC]
+  |  .IOS mov RA, BASE
+  |  bl extern modf
+  |  .IOS mov BASE, RA
+  |   mov RC, #(2+1)*8
+  |  strd CARG12, [BASE]
+  |  b ->fff_res
+  |.endif
+  |
+  |.macro math_minmax, name, cond, fcond
+  |.if FPU
+  |  .ffunc_1 name
+  |   add RB, BASE, RC
+  |  checktp CARG2, LJ_TISNUM
+  |   add RA, BASE, #8
+  |  bne >4
+  |1:  // Handle integers.
+  |  ldrd CARG34, [RA]
+  |   cmp RA, RB
+  |   bhs ->fff_restv
+  |  checktp CARG4, LJ_TISNUM
+  |  bne >3
+  |  cmp CARG1, CARG3
+  |   add RA, RA, #8
+  |  mov..cond CARG1, CARG3
+  |  b <1
+  |3:  // Convert intermediate result to number and continue below.
+  |  vmov s4, CARG1
+  |  bhi ->fff_fallback
+  |  vldr d1, [RA]
+  |  vcvt.f64.s32 d0, s4
+  |  b >6
+  |
+  |4:
+  |  vldr d0, [BASE]
+  |  bhi ->fff_fallback
+  |5:  // Handle numbers.
+  |  ldrd CARG34, [RA]
+  |  vldr d1, [RA]
+  |   cmp RA, RB
+  |   bhs ->fff_resd
+  |  checktp CARG4, LJ_TISNUM
+  |  bhs >7
+  |6:
+  |  vcmp.f64 d0, d1
+  |  vmrs
+  |   add RA, RA, #8
+  |  vmov..fcond.f64 d0, d1
+  |  b <5
+  |7:  // Convert integer to number and continue above.
+  |  vmov s4, CARG3
+  |  bhi ->fff_fallback
+  |  vcvt.f64.s32 d1, s4
+  |  b <6
+  |
+  |.else
+  |
+  |  .ffunc_1 name
+  |  checktp CARG2, LJ_TISNUM
+  |   mov RA, #8
+  |  bne >4
+  |1:  // Handle integers.
+  |  ldrd CARG34, [BASE, RA]
+  |   cmp RA, RC
+  |   bhs ->fff_restv
+  |  checktp CARG4, LJ_TISNUM
+  |  bne >3
+  |  cmp CARG1, CARG3
+  |   add RA, RA, #8
+  |  mov..cond CARG1, CARG3
+  |  b <1
+  |3:  // Convert intermediate result to number and continue below.
+  |  bhi ->fff_fallback
+  |  bl extern __aeabi_i2d
+  |  ldrd CARG34, [BASE, RA]
+  |  b >6
+  |
+  |4:
+  |  bhi ->fff_fallback
+  |5:  // Handle numbers.
+  |  ldrd CARG34, [BASE, RA]
+  |   cmp RA, RC
+  |   bhs ->fff_restv
+  |  checktp CARG4, LJ_TISNUM
+  |  bhs >7
+  |6:
+  |  bl extern __aeabi_cdcmple
+  |   add RA, RA, #8
+  |  mov..fcond CARG1, CARG3
+  |  mov..fcond CARG2, CARG4
+  |  b <5
+  |7:  // Convert integer to number and continue above.
+  |  bhi ->fff_fallback
+  |  strd CARG12, TMPD
+  |  mov CARG1, CARG3
+  |  bl extern __aeabi_i2d
+  |  ldrd CARG34, TMPD
+  |  b <6
+  |.endif
+  |.endmacro
+  |
+  |  math_minmax math_min, gt, hi
+  |  math_minmax math_max, lt, lo
+  |
+  |//-- String library -----------------------------------------------------
+  |
+  |.ffunc_1 string_len
+  |  checkstr CARG2, ->fff_fallback
+  |  ldr CARG1, STR:CARG1->len
+  |  mvn CARG2, #~LJ_TISNUM
+  |  b ->fff_restv
+  |
+  |.ffunc string_byte			// Only handle the 1-arg case here.
+  |  ldrd CARG12, [BASE]
+  |    ldr PC, [BASE, FRAME_PC]
+  |   cmp NARGS8:RC, #8
+  |   checktpeq CARG2, LJ_TSTR		// Need exactly 1 argument.
+  |   bne ->fff_fallback
+  |  ldr CARG3, STR:CARG1->len
+  |   ldrb CARG1, STR:CARG1[1]		// Access is always ok (NUL at end).
+  |   mvn CARG2, #~LJ_TISNUM
+  |  cmp CARG3, #0
+  |  moveq RC, #(0+1)*8
+  |  movne RC, #(1+1)*8
+  |   strd CARG12, [BASE, #-8]
+  |  b ->fff_res
+  |
+  |.ffunc string_char			// Only handle the 1-arg case here.
+  |  ffgccheck
+  |  ldrd CARG12, [BASE]
+  |    ldr PC, [BASE, FRAME_PC]
+  |   cmp NARGS8:RC, #8			// Need exactly 1 argument.
+  |   checktpeq CARG2, LJ_TISNUM
+  |   bicseq CARG4, CARG1, #255
+  |  mov CARG3, #1
+  |   bne ->fff_fallback
+  |  str CARG1, TMPD
+  |  mov CARG2, TMPDp			// Points to stack. Little-endian.
+  |->fff_newstr:
+  |  // CARG2 = str, CARG3 = len.
+  |   str BASE, L->base
+  |  mov CARG1, L
+  |   str PC, SAVE_PC
+  |  bl extern lj_str_new		// (lua_State *L, char *str, size_t l)
+  |  // Returns GCstr *.
+  |  ldr BASE, L->base
+  |   mvn CARG2, #~LJ_TSTR
+  |  b ->fff_restv
+  |
+  |.ffunc string_sub
+  |  ffgccheck
+  |  ldrd CARG12, [BASE]
+  |   ldrd CARG34, [BASE, #16]
+  |    cmp NARGS8:RC, #16
+  |     mvn RB, #0
+  |    beq >1
+  |    blo ->fff_fallback
+  |   checktp CARG4, LJ_TISNUM
+  |    mov RB, CARG3
+  |   bne ->fff_fallback
+  |1:
+  |  ldrd CARG34, [BASE, #8]
+  |  checktp CARG2, LJ_TSTR
+  |   ldreq CARG2, STR:CARG1->len
+  |  checktpeq CARG4, LJ_TISNUM
+  |  bne ->fff_fallback
+  |  // CARG1 = str, CARG2 = str->len, CARG3 = start, RB = end
+  |  add CARG4, CARG2, #1
+  |  cmp CARG3, #0			// if (start < 0) start += len+1
+  |  addlt CARG3, CARG3, CARG4
+  |  cmp CARG3, #1			// if (start < 1) start = 1
+  |  movlt CARG3, #1
+  |  cmp RB, #0				// if (end < 0) end += len+1
+  |  addlt RB, RB, CARG4
+  |  bic RB, RB, RB, asr #31		// if (end < 0) end = 0
+  |  cmp RB, CARG2			// if (end > len) end = len
+  |   add CARG1, STR:CARG1, #sizeof(GCstr)-1
+  |  movgt RB, CARG2
+  |   add CARG2, CARG1, CARG3
+  |  subs CARG3, RB, CARG3		// len = end - start
+  |   add CARG3, CARG3, #1		// len += 1
+  |  bge ->fff_newstr
+  |->fff_emptystr:
+  |  sub STR:CARG1, DISPATCH, #-DISPATCH_GL(strempty)
+  |  mvn CARG2, #~LJ_TSTR
+  |  b ->fff_restv
+  |
+  |.ffunc string_rep			// Only handle the 1-char case inline.
+  |  ffgccheck
+  |  ldrd CARG12, [BASE]
+  |   ldrd CARG34, [BASE, #8]
+  |    cmp NARGS8:RC, #16
+  |    bne ->fff_fallback		// Exactly 2 arguments
+  |  checktp CARG2, LJ_TSTR
+  |   checktpeq CARG4, LJ_TISNUM
+  |   bne ->fff_fallback
+  |  subs CARG4, CARG3, #1
+  |   ldr CARG2, STR:CARG1->len
+  |  blt ->fff_emptystr			// Count <= 0?
+  |   cmp CARG2, #1
+  |   blo ->fff_emptystr		// Zero-length string?
+  |   bne ->fff_fallback		// Fallback for > 1-char strings.
+  |  ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)]
+  |   ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)]
+  |   ldr CARG1, STR:CARG1[1]
+  |  cmp RB, CARG3
+  |  blo ->fff_fallback
+  |1:  // Fill buffer with char.
+  |   strb CARG1, [CARG2, CARG4]
+  |  subs CARG4, CARG4, #1
+  |  bge <1
+  |  b ->fff_newstr
+  |
+  |.ffunc string_reverse
+  |  ffgccheck
+  |  ldrd CARG12, [BASE]
+  |   cmp NARGS8:RC, #8
+  |   blo ->fff_fallback
+  |  checkstr CARG2, ->fff_fallback
+  |  ldr CARG3, STR:CARG1->len
+  |   ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)]
+  |    ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)]
+  |  mov CARG4, CARG3
+  |  add CARG1, STR:CARG1, #sizeof(GCstr)
+  |   cmp RB, CARG3
+  |   blo ->fff_fallback
+  |1:  // Reverse string copy.
+  |  ldrb RB, [CARG1], #1
+  |   subs CARG4, CARG4, #1
+  |   blt ->fff_newstr
+  |  strb RB, [CARG2, CARG4]
+  |  b <1
+  |
+  |.macro ffstring_case, name, lo
+  |  .ffunc name
+  |  ffgccheck
+  |  ldrd CARG12, [BASE]
+  |   cmp NARGS8:RC, #8
+  |   blo ->fff_fallback
+  |  checkstr CARG2, ->fff_fallback
+  |  ldr CARG3, STR:CARG1->len
+  |   ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)]
+  |    ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)]
+  |  mov CARG4, #0
+  |  add CARG1, STR:CARG1, #sizeof(GCstr)
+  |   cmp RB, CARG3
+  |   blo ->fff_fallback
+  |1:  // ASCII case conversion.
+  |  ldrb RB, [CARG1, CARG4]
+  |   cmp CARG4, CARG3
+  |   bhs ->fff_newstr
+  |  sub RC, RB, #lo
+  |  cmp RC, #26
+  |  eorlo RB, RB, #0x20
+  |  strb RB, [CARG2, CARG4]
+  |   add CARG4, CARG4, #1
+  |  b <1
+  |.endmacro
+  |
+  |ffstring_case string_lower, 65
+  |ffstring_case string_upper, 97
+  |
+  |//-- Table library ------------------------------------------------------
+  |
+  |.ffunc_1 table_getn
+  |  checktab CARG2, ->fff_fallback
+  |  .IOS mov RA, BASE
+  |  bl extern lj_tab_len		// (GCtab *t)
+  |  // Returns uint32_t (but less than 2^31).
+  |  .IOS mov BASE, RA
+  |  mvn CARG2, #~LJ_TISNUM
+  |  b ->fff_restv
+  |
+  |//-- Bit library --------------------------------------------------------
+  |
+  |// FP number to bit conversion for soft-float. Clobbers r0-r3.
+  |->vm_tobit_fb:
+  |  bhi ->fff_fallback
+  |->vm_tobit:
+  |  lsl RB, CARG2, #1
+  |  adds RB, RB, #0x00200000
+  |  movpl CARG1, #0			// |x| < 1?
+  |  bxpl lr
+  |  mvn CARG4, #0x3e0
+  |  subs RB, CARG4, RB, asr #21
+  |  bmi >1				// |x| >= 2^32?
+  |  lsl CARG4, CARG2, #11
+  |  orr CARG4, CARG4, #0x80000000
+  |  orr CARG4, CARG4, CARG1, lsr #21
+  |   cmp CARG2, #0
+  |  lsr CARG1, CARG4, RB
+  |   rsblt CARG1, CARG1, #0
+  |  bx lr
+  |1:
+  |  add RB, RB, #21
+  |  lsr CARG4, CARG1, RB
+  |  rsb RB, RB, #20
+  |  lsl CARG1, CARG2, #12
+  |   cmp CARG2, #0
+  |  orr CARG1, CARG4, CARG1, lsl RB
+  |   rsblt CARG1, CARG1, #0
+  |  bx lr
+  |
+  |.macro .ffunc_bit, name
+  |  .ffunc_1 bit_..name
+  |  checktp CARG2, LJ_TISNUM
+  |  blne ->vm_tobit_fb
+  |.endmacro
+  |
+  |.ffunc_bit tobit
+  |  mvn CARG2, #~LJ_TISNUM
+  |  b ->fff_restv
+  |
+  |.macro .ffunc_bit_op, name, ins
+  |  .ffunc_bit name
+  |  mov CARG3, CARG1
+  |  mov RA, #8
+  |1:
+  |  ldrd CARG12, [BASE, RA]
+  |   cmp RA, NARGS8:RC
+  |    add RA, RA, #8
+  |   bge >2
+  |  checktp CARG2, LJ_TISNUM
+  |  blne ->vm_tobit_fb
+  |  ins CARG3, CARG3, CARG1
+  |  b <1
+  |.endmacro
+  |
+  |.ffunc_bit_op band, and
+  |.ffunc_bit_op bor, orr
+  |.ffunc_bit_op bxor, eor
+  |
+  |2:
+  |  mvn CARG4, #~LJ_TISNUM
+  |   ldr PC, [BASE, FRAME_PC]
+  |  strd CARG34, [BASE, #-8]
+  |  b ->fff_res1
+  |
+  |.ffunc_bit bswap
+  |  eor CARG3, CARG1, CARG1, ror #16
+  |  bic CARG3, CARG3, #0x00ff0000
+  |  ror CARG1, CARG1, #8
+  |   mvn CARG2, #~LJ_TISNUM
+  |  eor CARG1, CARG1, CARG3, lsr #8
+  |  b ->fff_restv
+  |
+  |.ffunc_bit bnot
+  |  mvn CARG1, CARG1
+  |  mvn CARG2, #~LJ_TISNUM
+  |  b ->fff_restv
+  |
+  |.macro .ffunc_bit_sh, name, ins, shmod
+  |  .ffunc bit_..name
+  |  ldrd CARG12, [BASE, #8]
+  |   cmp NARGS8:RC, #16
+  |   blo ->fff_fallback
+  |  checktp CARG2, LJ_TISNUM
+  |  blne ->vm_tobit_fb
+  |.if shmod == 0
+  |  and RA, CARG1, #31
+  |.else
+  |  rsb RA, CARG1, #0
+  |.endif
+  |  ldrd CARG12, [BASE]
+  |  checktp CARG2, LJ_TISNUM
+  |  blne ->vm_tobit_fb
+  |  ins CARG1, CARG1, RA
+  |  mvn CARG2, #~LJ_TISNUM
+  |  b ->fff_restv
+  |.endmacro
+  |
+  |.ffunc_bit_sh lshift, lsl, 0
+  |.ffunc_bit_sh rshift, lsr, 0
+  |.ffunc_bit_sh arshift, asr, 0
+  |.ffunc_bit_sh rol, ror, 1
+  |.ffunc_bit_sh ror, ror, 0
+  |
+  |//-----------------------------------------------------------------------
+  |
+  |->fff_fallback:			// Call fast function fallback handler.
+  |  // BASE = new base, RC = nargs*8
+  |   ldr CARG3, [BASE, FRAME_FUNC]
+  |  ldr CARG2, L->maxstack
+  |  add CARG1, BASE, NARGS8:RC
+  |    ldr PC, [BASE, FRAME_PC]		// Fallback may overwrite PC.
+  |  str CARG1, L->top
+  |   ldr CARG3, CFUNC:CARG3->f
+  |    str BASE, L->base
+  |  add CARG1, CARG1, #8*LUA_MINSTACK
+  |    str PC, SAVE_PC			// Redundant (but a defined value).
+  |  cmp CARG1, CARG2
+  |   mov CARG1, L
+  |  bhi >5				// Need to grow stack.
+  |   blx CARG3				// (lua_State *L)
+  |  // Either throws an error, or recovers and returns -1, 0 or nresults+1.
+  |   ldr BASE, L->base
+  |  cmp CRET1, #0
+  |   lsl RC, CRET1, #3
+  |   sub RA, BASE, #8
+  |  bgt ->fff_res			// Returned nresults+1?
+  |1:  // Returned 0 or -1: retry fast path.
+  |   ldr CARG1, L->top
+  |    ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
+  |   sub NARGS8:RC, CARG1, BASE
+  |  bne ->vm_call_tail			// Returned -1?
+  |  ins_callt				// Returned 0: retry fast path.
+  |
+  |// Reconstruct previous base for vmeta_call during tailcall.
+  |->vm_call_tail:
+  |  ands CARG1, PC, #FRAME_TYPE
+  |   bic CARG2, PC, #FRAME_TYPEP
+  |  ldreq INS, [PC, #-4]
+  |  andeq CARG2, MASKR8, INS, lsr #5	// Conditional decode_RA8.
+  |  addeq CARG2, CARG2, #8
+  |  sub RB, BASE, CARG2
+  |  b ->vm_call_dispatch		// Resolve again for tailcall.
+  |
+  |5:  // Grow stack for fallback handler.
+  |  mov CARG2, #LUA_MINSTACK
+  |  bl extern lj_state_growstack	// (lua_State *L, int n)
+  |  ldr BASE, L->base
+  |  cmp CARG1, CARG1			// Set zero-flag to force retry.
+  |  b <1
+  |
+  |->fff_gcstep:			// Call GC step function.
+  |  // BASE = new base, RC = nargs*8
+  |  mov RA, lr
+  |   str BASE, L->base
+  |  add CARG2, BASE, NARGS8:RC
+  |   str PC, SAVE_PC			// Redundant (but a defined value).
+  |  str CARG2, L->top
+  |  mov CARG1, L
+  |  bl extern lj_gc_step		// (lua_State *L)
+  |   ldr BASE, L->base
+  |  mov lr, RA				// Help return address predictor.
+  |   ldr CFUNC:CARG3, [BASE, FRAME_FUNC]
+  |  bx lr
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Special dispatch targets -------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |->vm_record:				// Dispatch target for recording phase.
+  |.if JIT
+  |  ldrb CARG1, [DISPATCH, #DISPATCH_GL(hookmask)]
+  |  tst CARG1, #HOOK_VMEVENT		// No recording while in vmevent.
+  |  bne >5
+  |  // Decrement the hookcount for consistency, but always do the call.
+  |   ldr CARG2, [DISPATCH, #DISPATCH_GL(hookcount)]
+  |  tst CARG1, #HOOK_ACTIVE
+  |  bne >1
+  |   sub CARG2, CARG2, #1
+  |  tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT
+  |   strne CARG2, [DISPATCH, #DISPATCH_GL(hookcount)]
+  |  b >1
+  |.endif
+  |
+  |->vm_rethook:			// Dispatch target for return hooks.
+  |  ldrb CARG1, [DISPATCH, #DISPATCH_GL(hookmask)]
+  |  tst CARG1, #HOOK_ACTIVE		// Hook already active?
+  |  beq >1
+  |5:  // Re-dispatch to static ins.
+  |  decode_OP OP, INS
+  |  add OP, DISPATCH, OP, lsl #2
+  |  ldr pc, [OP, #GG_DISP2STATIC]
+  |
+  |->vm_inshook:			// Dispatch target for instr/line hooks.
+  |  ldrb CARG1, [DISPATCH, #DISPATCH_GL(hookmask)]
+  |   ldr CARG2, [DISPATCH, #DISPATCH_GL(hookcount)]
+  |  tst CARG1, #HOOK_ACTIVE		// Hook already active?
+  |  bne <5
+  |  tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT
+  |  beq <5
+  |   subs CARG2, CARG2, #1
+  |   str CARG2, [DISPATCH, #DISPATCH_GL(hookcount)]
+  |   beq >1
+  |  tst CARG1, #LUA_MASKLINE
+  |  beq <5
+  |1:
+  |  mov CARG1, L
+  |   str BASE, L->base
+  |  mov CARG2, PC
+  |  // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
+  |  bl extern lj_dispatch_ins		// (lua_State *L, const BCIns *pc)
+  |3:
+  |  ldr BASE, L->base
+  |4:  // Re-dispatch to static ins.
+  |  ldrb OP, [PC, #-4]
+  |   ldr INS, [PC, #-4]
+  |  add OP, DISPATCH, OP, lsl #2
+  |  ldr OP, [OP, #GG_DISP2STATIC]
+  |   decode_RA8 RA, INS
+  |   decode_RD RC, INS
+  |  bx OP
+  |
+  |->cont_hook:				// Continue from hook yield.
+  |  ldr CARG1, [CARG4, #-24]
+  |   add PC, PC, #4
+  |  str CARG1, SAVE_MULTRES		// Restore MULTRES for *M ins.
+  |  b <4
+  |
+  |->vm_hotloop:			// Hot loop counter underflow.
+  |.if JIT
+  |  ldr LFUNC:CARG3, [BASE, FRAME_FUNC]  // Same as curr_topL(L).
+  |   sub CARG1, DISPATCH, #-GG_DISP2J
+  |   str PC, SAVE_PC
+  |  ldr CARG3, LFUNC:CARG3->field_pc
+  |   mov CARG2, PC
+  |   str L, [DISPATCH, #DISPATCH_J(L)]
+  |  ldrb CARG3, [CARG3, #PC2PROTO(framesize)]
+  |   str BASE, L->base
+  |  add CARG3, BASE, CARG3, lsl #3
+  |  str CARG3, L->top
+  |  bl extern lj_trace_hot		// (jit_State *J, const BCIns *pc)
+  |  b <3
+  |.endif
+  |
+  |->vm_callhook:			// Dispatch target for call hooks.
+  |  mov CARG2, PC
+  |.if JIT
+  |  b >1
+  |.endif
+  |
+  |->vm_hotcall:			// Hot call counter underflow.
+  |.if JIT
+  |  orr CARG2, PC, #1
+  |1:
+  |.endif
+  |  add CARG4, BASE, RC
+  |   str PC, SAVE_PC
+  |    mov CARG1, L
+  |   str BASE, L->base
+  |    sub RA, RA, BASE
+  |  str CARG4, L->top
+  |  bl extern lj_dispatch_call		// (lua_State *L, const BCIns *pc)
+  |  // Returns ASMFunction.
+  |  ldr BASE, L->base
+  |   ldr CARG4, L->top
+  |    mov CARG2, #0
+  |  add RA, BASE, RA
+  |   sub NARGS8:RC, CARG4, BASE
+  |    str CARG2, SAVE_PC		// Invalidate for subsequent line hook.
+  |  ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
+  |   ldr INS, [PC, #-4]
+  |  bx CRET1
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Trace exit handler -------------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |->vm_exit_handler:
+  |.if JIT
+  |  sub sp, sp, #12
+  |  push {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}
+  |  ldr CARG1, [sp, #64]	// Load original value of lr.
+  |   ldr DISPATCH, [lr]	// Load DISPATCH.
+  |    add CARG3, sp, #64	// Recompute original value of sp.
+  |   mv_vmstate CARG4, EXIT
+  |    str CARG3, [sp, #52]	// Store sp in RID_SP
+  |   st_vmstate CARG4
+  |  ldr CARG2, [CARG1, #-4]!	// Get exit instruction.
+  |   str CARG1, [sp, #56]	// Store exit pc in RID_LR and RID_PC.
+  |   str CARG1, [sp, #60]
+  |.if FPU
+  |  vpush {d0-d15}
+  |.endif
+  |  lsl CARG2, CARG2, #8
+  |  add CARG1, CARG1, CARG2, asr #6
+  |   ldr CARG2, [lr, #4]	// Load exit stub group offset.
+  |   sub CARG1, CARG1, lr
+  |  ldr L, [DISPATCH, #DISPATCH_GL(jit_L)]
+  |   add CARG1, CARG2, CARG1, lsr #2	// Compute exit number.
+  |    ldr BASE, [DISPATCH, #DISPATCH_GL(jit_base)]
+  |   str CARG1, [DISPATCH, #DISPATCH_J(exitno)]
+  |   mov CARG4, #0
+  |  str L, [DISPATCH, #DISPATCH_J(L)]
+  |    str BASE, L->base
+  |   str CARG4, [DISPATCH, #DISPATCH_GL(jit_L)]
+  |  sub CARG1, DISPATCH, #-GG_DISP2J
+  |  mov CARG2, sp
+  |  bl extern lj_trace_exit		// (jit_State *J, ExitState *ex)
+  |  // Returns MULTRES (unscaled) or negated error code.
+  |  ldr CARG2, L->cframe
+  |   ldr BASE, L->base
+  |  bic CARG2, CARG2, #~CFRAME_RAWMASK	// Use two steps: bic sp is deprecated.
+  |  mov sp, CARG2
+  |   ldr PC, SAVE_PC			// Get SAVE_PC.
+  |  str L, SAVE_L			// Set SAVE_L (on-trace resume/yield).
+  |  b >1
+  |.endif
+  |->vm_exit_interp:
+  |  // CARG1 = MULTRES or negated error code, BASE, PC and DISPATCH set.
+  |.if JIT
+  |  ldr L, SAVE_L
+  |1:
+  |  cmp CARG1, #0
+  |  blt >3				// Check for error from exit.
+  |   lsl RC, CARG1, #3
+  |  ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
+  |   str RC, SAVE_MULTRES
+  |   mov CARG3, #0
+  |  ldr CARG2, LFUNC:CARG2->field_pc
+  |   str CARG3, [DISPATCH, #DISPATCH_GL(jit_L)]
+  |    mv_vmstate CARG4, INTERP
+  |  ldr KBASE, [CARG2, #PC2PROTO(k)]
+  |  // Modified copy of ins_next which handles function header dispatch, too.
+  |  ldrb OP, [PC]
+  |     mov MASKR8, #255
+  |   ldr INS, [PC], #4
+  |     lsl MASKR8, MASKR8, #3		// MASKR8 = 255*8.
+  |    st_vmstate CARG4
+  |  cmp OP, #BC_FUNCF			// Function header?
+  |  ldr OP, [DISPATCH, OP, lsl #2]
+  |   decode_RA8 RA, INS
+  |   lsrlo RC, INS, #16	// No: Decode operands A*8 and D.
+  |   subhs RC, RC, #8
+  |   addhs RA, RA, BASE	// Yes: RA = BASE+framesize*8, RC = nargs*8
+  |  bx OP
+  |
+  |3:  // Rethrow error from the right C frame.
+  |  rsb CARG2, CARG1, #0
+  |  mov CARG1, L
+  |  bl extern lj_err_throw		// (lua_State *L, int errcode)
+  |.endif
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Math helper functions ----------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |// FP value rounding. Called from JIT code.
+  |//
+  |// double lj_vm_floor/ceil/trunc(double x);
+  |.macro vm_round, func, hf
+  |.if hf == 1
+  |  vmov CARG1, CARG2, d0
+  |.endif
+  |  lsl CARG3, CARG2, #1
+  |  adds RB, CARG3, #0x00200000
+  |  bpl >2				// |x| < 1?
+  |  mvn CARG4, #0x3cc
+  |  subs RB, CARG4, RB, asr #21	// 2^0: RB = 51, 2^51: RB = 0.
+  |  bxlo lr				// |x| >= 2^52: done.
+  |  mvn CARG4, #1
+  |   bic CARG3, CARG1, CARG4, lsl RB	// ztest = lo & ~lomask
+  |  and CARG1, CARG1, CARG4, lsl RB	// lo &= lomask
+  |  subs RB, RB, #32
+  |   bicpl CARG4, CARG2, CARG4, lsl RB	// |x| <= 2^20: ztest |= hi & ~himask
+  |   orrpl CARG3, CARG3, CARG4
+  |   mvnpl CARG4, #1
+  |  andpl CARG2, CARG2, CARG4, lsl RB	// |x| <= 2^20: hi &= himask
+  |.if "func" == "floor"
+  |   tst CARG3, CARG2, asr #31		// iszero = ((ztest & signmask) == 0)
+  |.else
+  |   bics CARG3, CARG3, CARG2, asr #31	// iszero = ((ztest & ~signmask) == 0)
+  |.endif
+  |.if hf == 1
+  |  vmoveq d0, CARG1, CARG2
+  |.endif
+  |  bxeq lr				// iszero: done.
+  |  mvn CARG4, #1
+  |  cmp RB, #0
+  |  lslpl CARG3, CARG4, RB
+  |  mvnmi CARG3, #0
+  |  add RB, RB, #32
+  |  subs CARG1, CARG1, CARG4, lsl RB	// lo = lo-lomask
+  |  sbc CARG2, CARG2, CARG3		// hi = hi-himask+carry
+  |.if hf == 1
+  |  vmov d0, CARG1, CARG2
+  |.endif
+  |  bx lr
+  |
+  |2:  // |x| < 1:
+  |  bxcs lr				// |x| is not finite.
+  |  orr CARG3, CARG3, CARG1		// ztest = (2*hi) | lo
+  |.if "func" == "floor"
+  |  tst CARG3, CARG2, asr #31		// iszero = ((ztest & signmask) == 0)
+  |.else
+  |  bics CARG3, CARG3, CARG2, asr #31	// iszero = ((ztest & ~signmask) == 0)
+  |.endif
+  |  mov CARG1, #0			// lo = 0
+  |  and CARG2, CARG2, #0x80000000
+  |  ldrne CARG4, <9			// hi = sign(x) | (iszero ? 0.0 : 1.0)
+  |  orrne CARG2, CARG2, CARG4
+  |.if hf == 1
+  |  vmov d0, CARG1, CARG2
+  |.endif
+  |  bx lr
+  |.endmacro
+  |
+  |9:
+  |  .long 0x3ff00000			// hiword(+1.0)
+  |
+  |->vm_floor:
+  |.if HFABI
+  |  vm_round floor, 1
+  |.endif
+  |->vm_floor_sf:
+  |  vm_round floor, 0
+  |
+  |->vm_ceil:
+  |.if HFABI
+  |  vm_round ceil, 1
+  |.endif
+  |->vm_ceil_sf:
+  |  vm_round ceil, 0
+  |
+  |.macro vm_trunc, hf
+  |.if JIT
+  |.if hf == 1
+  |  vmov CARG1, CARG2, d0
+  |.endif
+  |  lsl CARG3, CARG2, #1
+  |  adds RB, CARG3, #0x00200000
+  |  andpl CARG2, CARG2, #0x80000000	// |x| < 1? hi = sign(x), lo = 0.
+  |  movpl CARG1, #0
+  |.if hf == 1
+  |  vmovpl d0, CARG1, CARG2
+  |.endif
+  |  bxpl lr
+  |  mvn CARG4, #0x3cc
+  |  subs RB, CARG4, RB, asr #21	// 2^0: RB = 51, 2^51: RB = 0.
+  |  bxlo lr				// |x| >= 2^52: already done.
+  |  mvn CARG4, #1
+  |  and CARG1, CARG1, CARG4, lsl RB	// lo &= lomask
+  |  subs RB, RB, #32
+  |  andpl CARG2, CARG2, CARG4, lsl RB	// |x| <= 2^20: hi &= himask
+  |.if hf == 1
+  |  vmov d0, CARG1, CARG2
+  |.endif
+  |  bx lr
+  |.endif
+  |.endmacro
+  |
+  |->vm_trunc:
+  |.if HFABI
+  |  vm_trunc 1
+  |.endif
+  |->vm_trunc_sf:
+  |  vm_trunc 0
+  |
+  |  // double lj_vm_mod(double dividend, double divisor);
+  |->vm_mod:
+  |.if FPU
+  |  // Special calling convention. Also, RC (r11) is not preserved.
+  |  vdiv.f64 d0, d6, d7
+  |   mov RC, lr
+  |  vmov CARG1, CARG2, d0
+  |  bl ->vm_floor_sf
+  |  vmov d0, CARG1, CARG2
+  |  vmul.f64 d0, d0, d7
+  |   mov lr, RC
+  |  vsub.f64 d6, d6, d0
+  |  bx lr
+  |.else
+  |  push {r0, r1, r2, r3, r4, lr}
+  |  bl extern __aeabi_ddiv
+  |  bl ->vm_floor_sf
+  |  ldrd CARG34, [sp, #8]
+  |  bl extern __aeabi_dmul
+  |  ldrd CARG34, [sp]
+  |  eor CARG2, CARG2, #0x80000000
+  |  bl extern __aeabi_dadd
+  |  add sp, sp, #20
+  |  pop {pc}
+  |.endif
+  |
+  |  // int lj_vm_modi(int dividend, int divisor);
+  |->vm_modi:
+  |  ands RB, CARG1, #0x80000000
+  |  rsbmi CARG1, CARG1, #0		// a = |dividend|
+  |  eor RB, RB, CARG2, asr #1		// Keep signdiff and sign(divisor).
+  |  cmp CARG2, #0
+  |  rsbmi CARG2, CARG2, #0		// b = |divisor|
+  |  subs CARG4, CARG2, #1
+  |  cmpne CARG1, CARG2
+  |  moveq CARG1, #0			// if (b == 1 || a == b) a = 0
+  |  tsthi CARG2, CARG4
+  |  andeq CARG1, CARG1, CARG4		// else if ((b & (b-1)) == 0) a &= b-1
+  |  bls >1
+  |  // Use repeated subtraction to get the remainder.
+  |  clz CARG3, CARG1
+  |  clz CARG4, CARG2
+  |  sub CARG4, CARG4, CARG3
+  |  rsbs CARG3, CARG4, #31		// entry = (31-(clz(b)-clz(a)))*8
+  |  addne pc, pc, CARG3, lsl #3	// Duff's device.
+  |  nop
+  {
+    int i;
+    for (i = 31; i >= 0; i--) {
+      |  cmp CARG1, CARG2, lsl #i
+      |  subhs CARG1, CARG1, CARG2, lsl #i
+    }
+  }
+  |1:
+  |  cmp CARG1, #0
+  |  cmpne RB, #0
+  |  submi CARG1, CARG1, CARG2		// if (y != 0 && signdiff) y = y - b
+  |  eors CARG2, CARG1, RB, lsl #1
+  |  rsbmi CARG1, CARG1, #0		// if (sign(divisor) != sign(y)) y = -y
+  |  bx lr
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Miscellaneous functions --------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |//-----------------------------------------------------------------------
+  |//-- FFI helper functions -----------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |// Handler for callback functions.
+  |// Saveregs already performed. Callback slot number in [sp], g in r12.
+  |->vm_ffi_callback:
+  |.if FFI
+  |.type CTSTATE, CTState, PC
+  |  ldr CTSTATE, GL:r12->ctype_state
+  |   add DISPATCH, r12, #GG_G2DISP
+  |.if FPU
+  |  str r4, SAVE_R4
+  |  add r4, sp, CFRAME_SPACE+4+8*8
+  |  vstmdb r4!, {d8-d15}
+  |.endif
+  |.if HFABI
+  |  add r12, CTSTATE, #offsetof(CTState, cb.fpr[8])
+  |.endif
+  |  strd CARG34, CTSTATE->cb.gpr[2]
+  |  strd CARG12, CTSTATE->cb.gpr[0]
+  |.if HFABI
+  |  vstmdb r12!, {d0-d7}
+  |.endif
+  |  ldr CARG4, [sp]
+  |   add CARG3, sp, #CFRAME_SIZE
+  |    mov CARG1, CTSTATE
+  |  lsr CARG4, CARG4, #3
+  |   str CARG3, CTSTATE->cb.stack
+  |    mov CARG2, sp
+  |  str CARG4, CTSTATE->cb.slot
+  |  str CTSTATE, SAVE_PC		// Any value outside of bytecode is ok.
+  |  bl extern lj_ccallback_enter	// (CTState *cts, void *cf)
+  |  // Returns lua_State *.
+  |  ldr BASE, L:CRET1->base
+  |    mv_vmstate CARG2, INTERP
+  |  ldr RC, L:CRET1->top
+  |    mov MASKR8, #255
+  |   ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
+  |    mov L, CRET1
+  |  sub RC, RC, BASE
+  |    lsl MASKR8, MASKR8, #3		// MASKR8 = 255*8.
+  |    st_vmstate CARG2
+  |  ins_callt
+  |.endif
+  |
+  |->cont_ffi_callback:			// Return from FFI callback.
+  |.if FFI
+  |  ldr CTSTATE, [DISPATCH, #DISPATCH_GL(ctype_state)]
+  |   str BASE, L->base
+  |   str CARG4, L->top
+  |  str L, CTSTATE->L
+  |  mov CARG1, CTSTATE
+  |  mov CARG2, RA
+  |  bl extern lj_ccallback_leave	// (CTState *cts, TValue *o)
+  |  ldrd CARG12, CTSTATE->cb.gpr[0]
+  |.if HFABI
+  |  vldr d0, CTSTATE->cb.fpr[0]
+  |.endif
+  |  b ->vm_leave_unw
+  |.endif
+  |
+  |->vm_ffi_call:			// Call C function via FFI.
+  |  // Caveat: needs special frame unwinding, see below.
+  |.if FFI
+  |  .type CCSTATE, CCallState, r4
+  |  push {CCSTATE, r5, r11, lr}
+  |  mov CCSTATE, CARG1
+  |  ldr CARG1, CCSTATE:CARG1->spadj
+  |   ldrb CARG2, CCSTATE->nsp
+  |    add CARG3, CCSTATE, #offsetof(CCallState, stack)
+  |.if HFABI
+  |  add RB, CCSTATE, #offsetof(CCallState, fpr[0])
+  |.endif
+  |  mov r11, sp
+  |  sub sp, sp, CARG1			// Readjust stack.
+  |   subs CARG2, CARG2, #1
+  |.if HFABI
+  |  vldm RB, {d0-d7}
+  |.endif
+  |    ldr RB, CCSTATE->func
+  |   bmi >2
+  |1:  // Copy stack slots.
+  |  ldr CARG4, [CARG3, CARG2, lsl #2]
+  |  str CARG4, [sp, CARG2, lsl #2]
+  |  subs CARG2, CARG2, #1
+  |  bpl <1
+  |2:
+  |  ldrd CARG12, CCSTATE->gpr[0]
+  |  ldrd CARG34, CCSTATE->gpr[2]
+  |  blx RB
+  |  mov sp, r11
+  |.if HFABI
+  |  add r12, CCSTATE, #offsetof(CCallState, fpr[4])
+  |.endif
+  |  strd CRET1, CCSTATE->gpr[0]
+  |.if HFABI
+  |  vstmdb r12!, {d0-d3}
+  |.endif
+  |  pop {CCSTATE, r5, r11, pc}
+  |.endif
+  |// Note: vm_ffi_call must be the last function in this object file!
+  |
+  |//-----------------------------------------------------------------------
+}
+
+/* Generate the code for a single instruction. */
+static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+{
+  int vk = 0;
+  |=>defop:
+
+  switch (op) {
+
+  /* -- Comparison ops ---------------------------------------------------- */
+
+  /* Remember: all ops branch for a true comparison, fall through otherwise. */
+
+  case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
+    |  // RA = src1*8, RC = src2, JMP with RC = target
+    |   lsl RC, RC, #3
+    |  ldrd CARG12, [RA, BASE]!
+    |    ldrh RB, [PC, #2]
+    |   ldrd CARG34, [RC, BASE]!
+    |    add PC, PC, #4
+    |    add RB, PC, RB, lsl #2
+    |  checktp CARG2, LJ_TISNUM
+    |  bne >3
+    |  checktp CARG4, LJ_TISNUM
+    |  bne >4
+    |  cmp CARG1, CARG3
+    if (op == BC_ISLT) {
+      |  sublt PC, RB, #0x20000
+    } else if (op == BC_ISGE) {
+      |  subge PC, RB, #0x20000
+    } else if (op == BC_ISLE) {
+      |  suble PC, RB, #0x20000
+    } else {
+      |  subgt PC, RB, #0x20000
+    }
+    |1:
+    |  ins_next
+    |
+    |3: // CARG12 is not an integer.
+    |.if FPU
+    |   vldr d0, [RA]
+    |  bhi ->vmeta_comp
+    |  // d0 is a number.
+    |  checktp CARG4, LJ_TISNUM
+    |   vldr d1, [RC]
+    |  blo >5
+    |  bhi ->vmeta_comp
+    |  // d0 is a number, CARG3 is an integer.
+    |  vmov s4, CARG3
+    |  vcvt.f64.s32 d1, s4
+    |  b >5
+    |4:  // CARG1 is an integer, CARG34 is not an integer.
+    |   vldr d1, [RC]
+    |  bhi ->vmeta_comp
+    |  // CARG1 is an integer, d1 is a number.
+    |  vmov s4, CARG1
+    |  vcvt.f64.s32 d0, s4
+    |5:  // d0 and d1 are numbers.
+    |  vcmp.f64 d0, d1
+    |  vmrs
+    |  // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
+    if (op == BC_ISLT) {
+      |  sublo PC, RB, #0x20000
+    } else if (op == BC_ISGE) {
+      |  subhs PC, RB, #0x20000
+    } else if (op == BC_ISLE) {
+      |  subls PC, RB, #0x20000
+    } else {
+      |  subhi PC, RB, #0x20000
+    }
+    |  b <1
+    |.else
+    |  bhi ->vmeta_comp
+    |  // CARG12 is a number.
+    |  checktp CARG4, LJ_TISNUM
+    |  movlo RA, RB			// Save RB.
+    |  blo >5
+    |  bhi ->vmeta_comp
+    |  // CARG12 is a number, CARG3 is an integer.
+    |  mov CARG1, CARG3
+    |  mov RC, RA
+    |  mov RA, RB			// Save RB.
+    |  bl extern __aeabi_i2d
+    |  mov CARG3, CARG1
+    |  mov CARG4, CARG2
+    |  ldrd CARG12, [RC]		// Restore first operand.
+    |  b >5
+    |4:  // CARG1 is an integer, CARG34 is not an integer.
+    |  bhi ->vmeta_comp
+    |  // CARG1 is an integer, CARG34 is a number.
+    |  mov RA, RB			// Save RB.
+    |  bl extern __aeabi_i2d
+    |  ldrd CARG34, [RC]		// Restore second operand.
+    |5:  // CARG12 and CARG34 are numbers.
+    |  bl extern __aeabi_cdcmple
+    |  // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
+    if (op == BC_ISLT) {
+      |  sublo PC, RA, #0x20000
+    } else if (op == BC_ISGE) {
+      |  subhs PC, RA, #0x20000
+    } else if (op == BC_ISLE) {
+      |  subls PC, RA, #0x20000
+    } else {
+      |  subhi PC, RA, #0x20000
+    }
+    |  b <1
+    |.endif
+    break;
+
+  case BC_ISEQV: case BC_ISNEV:
+    vk = op == BC_ISEQV;
+    |  // RA = src1*8, RC = src2, JMP with RC = target
+    |   lsl RC, RC, #3
+    |  ldrd CARG12, [RA, BASE]!
+    |    ldrh RB, [PC, #2]
+    |   ldrd CARG34, [RC, BASE]!
+    |    add PC, PC, #4
+    |    add RB, PC, RB, lsl #2
+    |  checktp CARG2, LJ_TISNUM
+    |  cmnls CARG4, #-LJ_TISNUM
+    if (vk) {
+      |  bls ->BC_ISEQN_Z
+    } else {
+      |  bls ->BC_ISNEN_Z
+    }
+    |  // Either or both types are not numbers.
+    |.if FFI
+    |  checktp CARG2, LJ_TCDATA
+    |  checktpne CARG4, LJ_TCDATA
+    |  beq ->vmeta_equal_cd
+    |.endif
+    |  cmp CARG2, CARG4			// Compare types.
+    |  bne >2				// Not the same type?
+    |  checktp CARG2, LJ_TISPRI
+    |  bhs >1				// Same type and primitive type?
+    |
+    |  // Same types and not a primitive type. Compare GCobj or pvalue.
+    |  cmp CARG1, CARG3
+    if (vk) {
+      |  bne >3				// Different GCobjs or pvalues?
+      |1:  // Branch if same.
+      |  sub PC, RB, #0x20000
+      |2:  // Different.
+      |  ins_next
+      |3:
+      |  checktp CARG2, LJ_TISTABUD
+      |  bhi <2				// Different objects and not table/ud?
+    } else {
+      |  beq >1				// Same GCobjs or pvalues?
+      |  checktp CARG2, LJ_TISTABUD
+      |  bhi >2				// Different objects and not table/ud?
+    }
+    |  // Different tables or userdatas. Need to check __eq metamethod.
+    |  // Field metatable must be at same offset for GCtab and GCudata!
+    |  ldr TAB:RA, TAB:CARG1->metatable
+    |  cmp TAB:RA, #0
+    if (vk) {
+      |  beq <2			// No metatable?
+    } else {
+      |  beq >2			// No metatable?
+    }
+    |  ldrb RA, TAB:RA->nomm
+    |   mov CARG4, #1-vk		// ne = 0 or 1.
+    |   mov CARG2, CARG1
+    |  tst RA, #1<<MM_eq
+    |  beq ->vmeta_equal		// 'no __eq' flag not set?
+    if (vk) {
+      |  b <2
+    } else {
+      |2:  // Branch if different.
+      |  sub PC, RB, #0x20000
+      |1:  // Same.
+      |  ins_next
+    }
+    break;
+
+  case BC_ISEQS: case BC_ISNES:
+    vk = op == BC_ISEQS;
+    |  // RA = src*8, RC = str_const (~), JMP with RC = target
+    |   mvn RC, RC
+    |  ldrd CARG12, [BASE, RA]
+    |    ldrh RB, [PC, #2]
+    |   ldr STR:CARG3, [KBASE, RC, lsl #2]
+    |    add PC, PC, #4
+    |    add RB, PC, RB, lsl #2
+    |  checktp CARG2, LJ_TSTR
+    |.if FFI
+    |  bne >7
+    |  cmp CARG1, CARG3
+    |.else
+    |  cmpeq CARG1, CARG3
+    |.endif
+    if (vk) {
+      |  subeq PC, RB, #0x20000
+      |1:
+    } else {
+      |1:
+      |  subne PC, RB, #0x20000
+    }
+    |  ins_next
+    |
+    |.if FFI
+    |7:
+    |  checktp CARG2, LJ_TCDATA
+    |  bne <1
+    |  b ->vmeta_equal_cd
+    |.endif
+    break;
+
+  case BC_ISEQN: case BC_ISNEN:
+    vk = op == BC_ISEQN;
+    |  // RA = src*8, RC = num_const (~), JMP with RC = target
+    |   lsl RC, RC, #3
+    |  ldrd CARG12, [RA, BASE]!
+    |    ldrh RB, [PC, #2]
+    |   ldrd CARG34, [RC, KBASE]!
+    |    add PC, PC, #4
+    |    add RB, PC, RB, lsl #2
+    if (vk) {
+      |->BC_ISEQN_Z:
+    } else {
+      |->BC_ISNEN_Z:
+    }
+    |  checktp CARG2, LJ_TISNUM
+    |  bne >3
+    |  checktp CARG4, LJ_TISNUM
+    |  bne >4
+    |  cmp CARG1, CARG3
+    if (vk) {
+      |  subeq PC, RB, #0x20000
+      |1:
+    } else {
+      |1:
+      |  subne PC, RB, #0x20000
+    }
+    |2:
+    |  ins_next
+    |
+    |3:  // CARG12 is not an integer.
+    |.if FFI
+    |  bhi >7
+    |.else
+    if (!vk) {
+      |  subhi PC, RB, #0x20000
+    }
+    |  bhi <2
+    |.endif
+    |.if FPU
+    |  checktp CARG4, LJ_TISNUM
+    |  vmov s4, CARG3
+    |   vldr d0, [RA]
+    |  vldrlo d1, [RC]
+    |  vcvths.f64.s32 d1, s4
+    |  b >5
+    |4:  // CARG1 is an integer, d1 is a number.
+    |  vmov s4, CARG1
+    |   vldr d1, [RC]
+    |  vcvt.f64.s32 d0, s4
+    |5:  // d0 and d1 are numbers.
+    |  vcmp.f64 d0, d1
+    |  vmrs
+    if (vk) {
+      |  subeq PC, RB, #0x20000
+    } else {
+      |  subne PC, RB, #0x20000
+    }
+    |  b <2
+    |.else
+    |  // CARG12 is a number.
+    |  checktp CARG4, LJ_TISNUM
+    |  movlo RA, RB			// Save RB.
+    |  blo >5
+    |  // CARG12 is a number, CARG3 is an integer.
+    |  mov CARG1, CARG3
+    |  mov RC, RA
+    |4:  // CARG1 is an integer, CARG34 is a number.
+    |  mov RA, RB			// Save RB.
+    |  bl extern __aeabi_i2d
+    |  ldrd CARG34, [RC]		// Restore other operand.
+    |5:  // CARG12 and CARG34 are numbers.
+    |  bl extern __aeabi_cdcmpeq
+    if (vk) {
+      |  subeq PC, RA, #0x20000
+    } else {
+      |  subne PC, RA, #0x20000
+    }
+    |  b <2
+    |.endif
+    |
+    |.if FFI
+    |7:
+    |  checktp CARG2, LJ_TCDATA
+    |  bne <1
+    |  b ->vmeta_equal_cd
+    |.endif
+    break;
+
+  case BC_ISEQP: case BC_ISNEP:
+    vk = op == BC_ISEQP;
+    |  // RA = src*8, RC = primitive_type (~), JMP with RC = target
+    |  ldrd CARG12, [BASE, RA]
+    |   ldrh RB, [PC, #2]
+    |   add PC, PC, #4
+    |  mvn RC, RC
+    |   add RB, PC, RB, lsl #2
+    |.if FFI
+    |  checktp CARG2, LJ_TCDATA
+    |  beq ->vmeta_equal_cd
+    |.endif
+    |  cmp CARG2, RC
+    if (vk) {
+      |  subeq PC, RB, #0x20000
+    } else {
+      |  subne PC, RB, #0x20000
+    }
+    |  ins_next
+    break;
+
+  /* -- Unary test and copy ops ------------------------------------------- */
+
+  case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
+    |  // RA = dst*8 or unused, RC = src, JMP with RC = target
+    |  add RC, BASE, RC, lsl #3
+    |   ldrh RB, [PC, #2]
+    |  ldrd CARG12, [RC]
+    |   add PC, PC, #4
+    |   add RB, PC, RB, lsl #2
+    |  checktp CARG2, LJ_TTRUE
+    if (op == BC_ISTC || op == BC_IST) {
+      |  subls PC, RB, #0x20000
+      if (op == BC_ISTC) {
+	|  strdls CARG12, [BASE, RA]
+      }
+    } else {
+      |  subhi PC, RB, #0x20000
+      if (op == BC_ISFC) {
+	|  strdhi CARG12, [BASE, RA]
+      }
+    }
+    |  ins_next
+    break;
+
+  /* -- Unary ops --------------------------------------------------------- */
+
+  case BC_MOV:
+    |  // RA = dst*8, RC = src
+    |  lsl RC, RC, #3
+    |   ins_next1
+    |  ldrd CARG12, [BASE, RC]
+    |   ins_next2
+    |  strd CARG12, [BASE, RA]
+    |   ins_next3
+    break;
+  case BC_NOT:
+    |  // RA = dst*8, RC = src
+    |  add RC, BASE, RC, lsl #3
+    |   ins_next1
+    |  ldr CARG1, [RC, #4]
+    |   add RA, BASE, RA
+    |   ins_next2
+    |  checktp CARG1, LJ_TTRUE
+    |  mvnls CARG2, #~LJ_TFALSE
+    |  mvnhi CARG2, #~LJ_TTRUE
+    |  str CARG2, [RA, #4]
+    |   ins_next3
+    break;
+  case BC_UNM:
+    |  // RA = dst*8, RC = src
+    |  lsl RC, RC, #3
+    |  ldrd CARG12, [BASE, RC]
+    |   ins_next1
+    |   ins_next2
+    |  checktp CARG2, LJ_TISNUM
+    |  bhi ->vmeta_unm
+    |  eorne CARG2, CARG2, #0x80000000
+    |  bne >5
+    |  rsbseq CARG1, CARG1, #0
+    |  ldrdvs CARG12, >9
+    |5:
+    |  strd CARG12, [BASE, RA]
+    |   ins_next3
+    |
+    |.align 8
+    |9:
+    |  .long 0x00000000, 0x41e00000	// 2^31.
+    break;
+  case BC_LEN:
+    |  // RA = dst*8, RC = src
+    |  lsl RC, RC, #3
+    |  ldrd CARG12, [BASE, RC]
+    |  checkstr CARG2, >2
+    |  ldr CARG1, STR:CARG1->len
+    |1:
+    |  mvn CARG2, #~LJ_TISNUM
+    |   ins_next1
+    |   ins_next2
+    |  strd CARG12, [BASE, RA]
+    |   ins_next3
+    |2:
+    |  checktab CARG2, ->vmeta_len
+#if LJ_52
+    |  ldr TAB:CARG3, TAB:CARG1->metatable
+    |  cmp TAB:CARG3, #0
+    |  bne >9
+    |3:
+#endif
+    |->BC_LEN_Z:
+    |  .IOS mov RC, BASE
+    |  bl extern lj_tab_len		// (GCtab *t)
+    |  // Returns uint32_t (but less than 2^31).
+    |  .IOS mov BASE, RC
+    |  b <1
+#if LJ_52
+    |9:
+    |  ldrb CARG4, TAB:CARG3->nomm
+    |  tst CARG4, #1<<MM_len
+    |  bne <3				// 'no __len' flag set: done.
+    |  b ->vmeta_len
+#endif
+    break;
+
+  /* -- Binary ops -------------------------------------------------------- */
+
+    |.macro ins_arithcheck, cond, ncond, target
+    ||if (vk == 1) {
+    |   cmn CARG4, #-LJ_TISNUM
+    |    cmn..cond CARG2, #-LJ_TISNUM
+    ||} else {
+    |   cmn CARG2, #-LJ_TISNUM
+    |    cmn..cond CARG4, #-LJ_TISNUM
+    ||}
+    |  b..ncond target
+    |.endmacro
+    |.macro ins_arithcheck_int, target
+    |  ins_arithcheck eq, ne, target
+    |.endmacro
+    |.macro ins_arithcheck_num, target
+    |  ins_arithcheck lo, hs, target
+    |.endmacro
+    |
+    |.macro ins_arithpre
+    |  decode_RB8 RB, INS
+    |   decode_RC8 RC, INS
+    |  // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
+    ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
+    ||switch (vk) {
+    ||case 0:
+    |   .if FPU
+    |   ldrd CARG12, [RB, BASE]!
+    |    ldrd CARG34, [RC, KBASE]!
+    |   .else
+    |   ldrd CARG12, [BASE, RB]
+    |    ldrd CARG34, [KBASE, RC]
+    |   .endif
+    ||  break;
+    ||case 1:
+    |   .if FPU
+    |   ldrd CARG34, [RB, BASE]!
+    |    ldrd CARG12, [RC, KBASE]!
+    |   .else
+    |   ldrd CARG34, [BASE, RB]
+    |    ldrd CARG12, [KBASE, RC]
+    |   .endif
+    ||  break;
+    ||default:
+    |   .if FPU
+    |   ldrd CARG12, [RB, BASE]!
+    |    ldrd CARG34, [RC, BASE]!
+    |   .else
+    |   ldrd CARG12, [BASE, RB]
+    |    ldrd CARG34, [BASE, RC]
+    |   .endif
+    ||  break;
+    ||}
+    |.endmacro
+    |
+    |.macro ins_arithpre_fpu, reg1, reg2
+    |.if FPU
+    ||if (vk == 1) {
+    |  vldr reg2, [RB]
+    |  vldr reg1, [RC]
+    ||} else {
+    |  vldr reg1, [RB]
+    |  vldr reg2, [RC]
+    ||}
+    |.endif
+    |.endmacro
+    |
+    |.macro ins_arithpost_fpu, reg
+    |   ins_next1
+    |  add RA, BASE, RA
+    |   ins_next2
+    |  vstr reg, [RA]
+    |   ins_next3
+    |.endmacro
+    |
+    |.macro ins_arithfallback, ins
+    ||switch (vk) {
+    ||case 0:
+    |   ins ->vmeta_arith_vn
+    ||  break;
+    ||case 1:
+    |   ins ->vmeta_arith_nv
+    ||  break;
+    ||default:
+    |   ins ->vmeta_arith_vv
+    ||  break;
+    ||}
+    |.endmacro
+    |
+    |.macro ins_arithdn, intins, fpins, fpcall
+    |  ins_arithpre
+    |.if "intins" ~= "vm_modi" and not FPU
+    |   ins_next1
+    |.endif
+    |  ins_arithcheck_int >5
+    |.if "intins" == "smull"
+    |  smull CARG1, RC, CARG3, CARG1
+    |  cmp RC, CARG1, asr #31
+    |  ins_arithfallback bne
+    |.elif "intins" == "vm_modi"
+    |  movs CARG2, CARG3
+    |  ins_arithfallback beq
+    |  bl ->vm_modi
+    |  mvn CARG2, #~LJ_TISNUM
+    |.else
+    |  intins CARG1, CARG1, CARG3
+    |  ins_arithfallback bvs
+    |.endif
+    |4:
+    |.if "intins" == "vm_modi" or FPU
+    |   ins_next1
+    |.endif
+    |   ins_next2
+    |  strd CARG12, [BASE, RA]
+    |   ins_next3
+    |5:  // FP variant.
+    |  ins_arithpre_fpu d6, d7
+    |  ins_arithfallback ins_arithcheck_num
+    |.if FPU
+    |.if "intins" == "vm_modi"
+    |  bl fpcall
+    |.else
+    |  fpins d6, d6, d7
+    |.endif
+    |  ins_arithpost_fpu d6
+    |.else
+    |  bl fpcall
+    |.if "intins" ~= "vm_modi"
+    |  ins_next1
+    |.endif
+    |  b <4
+    |.endif
+    |.endmacro
+    |
+    |.macro ins_arithfp, fpins, fpcall
+    |  ins_arithpre
+    |.if "fpins" ~= "extern" or HFABI
+    |  ins_arithpre_fpu d0, d1
+    |.endif
+    |  ins_arithfallback ins_arithcheck_num
+    |.if "fpins" == "extern"
+    |  .IOS mov RC, BASE
+    |  bl fpcall
+    |  .IOS mov BASE, RC
+    |.elif FPU
+    |  fpins d0, d0, d1
+    |.else
+    |  bl fpcall
+    |.endif
+    |.if ("fpins" ~= "extern" or HFABI) and FPU
+    |  ins_arithpost_fpu d0
+    |.else
+    |   ins_next1
+    |   ins_next2
+    |  strd CARG12, [BASE, RA]
+    |   ins_next3
+    |.endif
+    |.endmacro
+
+  case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
+    |  ins_arithdn adds, vadd.f64, extern __aeabi_dadd
+    break;
+  case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
+    |  ins_arithdn subs, vsub.f64, extern __aeabi_dsub
+    break;
+  case BC_MULVN: case BC_MULNV: case BC_MULVV:
+    |  ins_arithdn smull, vmul.f64, extern __aeabi_dmul
+    break;
+  case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
+    |  ins_arithfp vdiv.f64, extern __aeabi_ddiv
+    break;
+  case BC_MODVN: case BC_MODNV: case BC_MODVV:
+    |  ins_arithdn vm_modi, vm_mod, ->vm_mod
+    break;
+  case BC_POW:
+    |  // NYI: (partial) integer arithmetic.
+    |  ins_arithfp extern, extern pow
+    break;
+
+  case BC_CAT:
+    |  decode_RB8 RC, INS
+    |   decode_RC8 RB, INS
+    |  // RA = dst*8, RC = src_start*8, RB = src_end*8  (note: RB/RC swapped!)
+    |  sub CARG3, RB, RC
+    |   str BASE, L->base
+    |  add CARG2, BASE, RB
+    |->BC_CAT_Z:
+    |  // RA = dst*8, RC = src_start*8, CARG2 = top-1
+    |  mov CARG1, L
+    |   str PC, SAVE_PC
+    |  lsr CARG3, CARG3, #3
+    |  bl extern lj_meta_cat		// (lua_State *L, TValue *top, int left)
+    |  // Returns NULL (finished) or TValue * (metamethod).
+    |  ldr BASE, L->base
+    |  cmp CRET1, #0
+    |  bne ->vmeta_binop
+    |  ldrd CARG34, [BASE, RC]
+    |   ins_next1
+    |   ins_next2
+    |  strd CARG34, [BASE, RA]		// Copy result to RA.
+    |   ins_next3
+    break;
+
+  /* -- Constant ops ------------------------------------------------------ */
+
+  case BC_KSTR:
+    |  // RA = dst*8, RC = str_const (~)
+    |  mvn RC, RC
+    |   ins_next1
+    |  ldr CARG1, [KBASE, RC, lsl #2]
+    |  mvn CARG2, #~LJ_TSTR
+    |   ins_next2
+    |  strd CARG12, [BASE, RA]
+    |   ins_next3
+    break;
+  case BC_KCDATA:
+    |.if FFI
+    |  // RA = dst*8, RC = cdata_const (~)
+    |  mvn RC, RC
+    |   ins_next1
+    |  ldr CARG1, [KBASE, RC, lsl #2]
+    |  mvn CARG2, #~LJ_TCDATA
+    |   ins_next2
+    |  strd CARG12, [BASE, RA]
+    |   ins_next3
+    |.endif
+    break;
+  case BC_KSHORT:
+    |  // RA = dst*8, (RC = int16_literal)
+    |  mov CARG1, INS, asr #16			// Refetch sign-extended reg.
+    |  mvn CARG2, #~LJ_TISNUM
+    |   ins_next1
+    |   ins_next2
+    |

<TRUNCATED>