You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficserver.apache.org by zw...@apache.org on 2015/07/29 01:40:01 UTC

[23/62] [abbrv] trafficserver git commit: TS-3783 TS-3030 Add luajit v2.0.4 as a subtree

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/1f27b840/lib/luajit/src/lj_bc.c
----------------------------------------------------------------------
diff --git a/lib/luajit/src/lj_bc.c b/lib/luajit/src/lj_bc.c
new file mode 100644
index 0000000..a8f444c
--- /dev/null
+++ b/lib/luajit/src/lj_bc.c
@@ -0,0 +1,14 @@
+/*
+** Bytecode instruction modes.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_bc_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+#include "lj_bc.h"
+
+/* Bytecode offsets and bytecode instruction modes. */
+#include "lj_bcdef.h"
+

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/1f27b840/lib/luajit/src/lj_bc.h
----------------------------------------------------------------------
diff --git a/lib/luajit/src/lj_bc.h b/lib/luajit/src/lj_bc.h
new file mode 100644
index 0000000..7436fab
--- /dev/null
+++ b/lib/luajit/src/lj_bc.h
@@ -0,0 +1,261 @@
+/*
+** Bytecode instruction format.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_BC_H
+#define _LJ_BC_H
+
+#include "lj_def.h"
+#include "lj_arch.h"
+
+/* Bytecode instruction format, 32 bit wide, fields of 8 or 16 bit:
+**
+** +----+----+----+----+
+** | B  | C  | A  | OP | Format ABC
+** +----+----+----+----+
+** |    D    | A  | OP | Format AD
+** +--------------------
+** MSB               LSB
+**
+** In-memory instructions are always stored in host byte order.
+*/
+
+/* Operand ranges and related constants. */
+#define BCMAX_A		0xff
+#define BCMAX_B		0xff
+#define BCMAX_C		0xff
+#define BCMAX_D		0xffff
+#define BCBIAS_J	0x8000
+#define NO_REG		BCMAX_A
+#define NO_JMP		(~(BCPos)0)
+
+/* Macros to get instruction fields. */
+#define bc_op(i)	((BCOp)((i)&0xff))
+#define bc_a(i)		((BCReg)(((i)>>8)&0xff))
+#define bc_b(i)		((BCReg)((i)>>24))
+#define bc_c(i)		((BCReg)(((i)>>16)&0xff))
+#define bc_d(i)		((BCReg)((i)>>16))
+#define bc_j(i)		((ptrdiff_t)bc_d(i)-BCBIAS_J)
+
+/* Macros to set instruction fields. */
+#define setbc_byte(p, x, ofs) \
+  ((uint8_t *)(p))[LJ_ENDIAN_SELECT(ofs, 3-ofs)] = (uint8_t)(x)
+#define setbc_op(p, x)	setbc_byte(p, (x), 0)
+#define setbc_a(p, x)	setbc_byte(p, (x), 1)
+#define setbc_b(p, x)	setbc_byte(p, (x), 3)
+#define setbc_c(p, x)	setbc_byte(p, (x), 2)
+#define setbc_d(p, x) \
+  ((uint16_t *)(p))[LJ_ENDIAN_SELECT(1, 0)] = (uint16_t)(x)
+#define setbc_j(p, x)	setbc_d(p, (BCPos)((int32_t)(x)+BCBIAS_J))
+
+/* Macros to compose instructions. */
+#define BCINS_ABC(o, a, b, c) \
+  (((BCIns)(o))|((BCIns)(a)<<8)|((BCIns)(b)<<24)|((BCIns)(c)<<16))
+#define BCINS_AD(o, a, d) \
+  (((BCIns)(o))|((BCIns)(a)<<8)|((BCIns)(d)<<16))
+#define BCINS_AJ(o, a, j)	BCINS_AD(o, a, (BCPos)((int32_t)(j)+BCBIAS_J))
+
+/* Bytecode instruction definition. Order matters, see below.
+**
+** (name, filler, Amode, Bmode, Cmode or Dmode, metamethod)
+**
+** The opcode name suffixes specify the type for RB/RC or RD:
+** V = variable slot
+** S = string const
+** N = number const
+** P = primitive type (~itype)
+** B = unsigned byte literal
+** M = multiple args/results
+*/
+#define BCDEF(_) \
+  /* Comparison ops. ORDER OPR. */ \
+  _(ISLT,	var,	___,	var,	lt) \
+  _(ISGE,	var,	___,	var,	lt) \
+  _(ISLE,	var,	___,	var,	le) \
+  _(ISGT,	var,	___,	var,	le) \
+  \
+  _(ISEQV,	var,	___,	var,	eq) \
+  _(ISNEV,	var,	___,	var,	eq) \
+  _(ISEQS,	var,	___,	str,	eq) \
+  _(ISNES,	var,	___,	str,	eq) \
+  _(ISEQN,	var,	___,	num,	eq) \
+  _(ISNEN,	var,	___,	num,	eq) \
+  _(ISEQP,	var,	___,	pri,	eq) \
+  _(ISNEP,	var,	___,	pri,	eq) \
+  \
+  /* Unary test and copy ops. */ \
+  _(ISTC,	dst,	___,	var,	___) \
+  _(ISFC,	dst,	___,	var,	___) \
+  _(IST,	___,	___,	var,	___) \
+  _(ISF,	___,	___,	var,	___) \
+  \
+  /* Unary ops. */ \
+  _(MOV,	dst,	___,	var,	___) \
+  _(NOT,	dst,	___,	var,	___) \
+  _(UNM,	dst,	___,	var,	unm) \
+  _(LEN,	dst,	___,	var,	len) \
+  \
+  /* Binary ops. ORDER OPR. VV last, POW must be next. */ \
+  _(ADDVN,	dst,	var,	num,	add) \
+  _(SUBVN,	dst,	var,	num,	sub) \
+  _(MULVN,	dst,	var,	num,	mul) \
+  _(DIVVN,	dst,	var,	num,	div) \
+  _(MODVN,	dst,	var,	num,	mod) \
+  \
+  _(ADDNV,	dst,	var,	num,	add) \
+  _(SUBNV,	dst,	var,	num,	sub) \
+  _(MULNV,	dst,	var,	num,	mul) \
+  _(DIVNV,	dst,	var,	num,	div) \
+  _(MODNV,	dst,	var,	num,	mod) \
+  \
+  _(ADDVV,	dst,	var,	var,	add) \
+  _(SUBVV,	dst,	var,	var,	sub) \
+  _(MULVV,	dst,	var,	var,	mul) \
+  _(DIVVV,	dst,	var,	var,	div) \
+  _(MODVV,	dst,	var,	var,	mod) \
+  \
+  _(POW,	dst,	var,	var,	pow) \
+  _(CAT,	dst,	rbase,	rbase,	concat) \
+  \
+  /* Constant ops. */ \
+  _(KSTR,	dst,	___,	str,	___) \
+  _(KCDATA,	dst,	___,	cdata,	___) \
+  _(KSHORT,	dst,	___,	lits,	___) \
+  _(KNUM,	dst,	___,	num,	___) \
+  _(KPRI,	dst,	___,	pri,	___) \
+  _(KNIL,	base,	___,	base,	___) \
+  \
+  /* Upvalue and function ops. */ \
+  _(UGET,	dst,	___,	uv,	___) \
+  _(USETV,	uv,	___,	var,	___) \
+  _(USETS,	uv,	___,	str,	___) \
+  _(USETN,	uv,	___,	num,	___) \
+  _(USETP,	uv,	___,	pri,	___) \
+  _(UCLO,	rbase,	___,	jump,	___) \
+  _(FNEW,	dst,	___,	func,	gc) \
+  \
+  /* Table ops. */ \
+  _(TNEW,	dst,	___,	lit,	gc) \
+  _(TDUP,	dst,	___,	tab,	gc) \
+  _(GGET,	dst,	___,	str,	index) \
+  _(GSET,	var,	___,	str,	newindex) \
+  _(TGETV,	dst,	var,	var,	index) \
+  _(TGETS,	dst,	var,	str,	index) \
+  _(TGETB,	dst,	var,	lit,	index) \
+  _(TSETV,	var,	var,	var,	newindex) \
+  _(TSETS,	var,	var,	str,	newindex) \
+  _(TSETB,	var,	var,	lit,	newindex) \
+  _(TSETM,	base,	___,	num,	newindex) \
+  \
+  /* Calls and vararg handling. T = tail call. */ \
+  _(CALLM,	base,	lit,	lit,	call) \
+  _(CALL,	base,	lit,	lit,	call) \
+  _(CALLMT,	base,	___,	lit,	call) \
+  _(CALLT,	base,	___,	lit,	call) \
+  _(ITERC,	base,	lit,	lit,	call) \
+  _(ITERN,	base,	lit,	lit,	call) \
+  _(VARG,	base,	lit,	lit,	___) \
+  _(ISNEXT,	base,	___,	jump,	___) \
+  \
+  /* Returns. */ \
+  _(RETM,	base,	___,	lit,	___) \
+  _(RET,	rbase,	___,	lit,	___) \
+  _(RET0,	rbase,	___,	lit,	___) \
+  _(RET1,	rbase,	___,	lit,	___) \
+  \
+  /* Loops and branches. I/J = interp/JIT, I/C/L = init/call/loop. */ \
+  _(FORI,	base,	___,	jump,	___) \
+  _(JFORI,	base,	___,	jump,	___) \
+  \
+  _(FORL,	base,	___,	jump,	___) \
+  _(IFORL,	base,	___,	jump,	___) \
+  _(JFORL,	base,	___,	lit,	___) \
+  \
+  _(ITERL,	base,	___,	jump,	___) \
+  _(IITERL,	base,	___,	jump,	___) \
+  _(JITERL,	base,	___,	lit,	___) \
+  \
+  _(LOOP,	rbase,	___,	jump,	___) \
+  _(ILOOP,	rbase,	___,	jump,	___) \
+  _(JLOOP,	rbase,	___,	lit,	___) \
+  \
+  _(JMP,	rbase,	___,	jump,	___) \
+  \
+  /* Function headers. I/J = interp/JIT, F/V/C = fixarg/vararg/C func. */ \
+  _(FUNCF,	rbase,	___,	___,	___) \
+  _(IFUNCF,	rbase,	___,	___,	___) \
+  _(JFUNCF,	rbase,	___,	lit,	___) \
+  _(FUNCV,	rbase,	___,	___,	___) \
+  _(IFUNCV,	rbase,	___,	___,	___) \
+  _(JFUNCV,	rbase,	___,	lit,	___) \
+  _(FUNCC,	rbase,	___,	___,	___) \
+  _(FUNCCW,	rbase,	___,	___,	___)
+
+/* Bytecode opcode numbers. */
+typedef enum {
+#define BCENUM(name, ma, mb, mc, mt)	BC_##name,
+BCDEF(BCENUM)
+#undef BCENUM
+  BC__MAX
+} BCOp;
+
+LJ_STATIC_ASSERT((int)BC_ISEQV+1 == (int)BC_ISNEV);
+LJ_STATIC_ASSERT(((int)BC_ISEQV^1) == (int)BC_ISNEV);
+LJ_STATIC_ASSERT(((int)BC_ISEQS^1) == (int)BC_ISNES);
+LJ_STATIC_ASSERT(((int)BC_ISEQN^1) == (int)BC_ISNEN);
+LJ_STATIC_ASSERT(((int)BC_ISEQP^1) == (int)BC_ISNEP);
+LJ_STATIC_ASSERT(((int)BC_ISLT^1) == (int)BC_ISGE);
+LJ_STATIC_ASSERT(((int)BC_ISLE^1) == (int)BC_ISGT);
+LJ_STATIC_ASSERT(((int)BC_ISLT^3) == (int)BC_ISGT);
+LJ_STATIC_ASSERT((int)BC_IST-(int)BC_ISTC == (int)BC_ISF-(int)BC_ISFC);
+LJ_STATIC_ASSERT((int)BC_CALLT-(int)BC_CALL == (int)BC_CALLMT-(int)BC_CALLM);
+LJ_STATIC_ASSERT((int)BC_CALLMT + 1 == (int)BC_CALLT);
+LJ_STATIC_ASSERT((int)BC_RETM + 1 == (int)BC_RET);
+LJ_STATIC_ASSERT((int)BC_FORL + 1 == (int)BC_IFORL);
+LJ_STATIC_ASSERT((int)BC_FORL + 2 == (int)BC_JFORL);
+LJ_STATIC_ASSERT((int)BC_ITERL + 1 == (int)BC_IITERL);
+LJ_STATIC_ASSERT((int)BC_ITERL + 2 == (int)BC_JITERL);
+LJ_STATIC_ASSERT((int)BC_LOOP + 1 == (int)BC_ILOOP);
+LJ_STATIC_ASSERT((int)BC_LOOP + 2 == (int)BC_JLOOP);
+LJ_STATIC_ASSERT((int)BC_FUNCF + 1 == (int)BC_IFUNCF);
+LJ_STATIC_ASSERT((int)BC_FUNCF + 2 == (int)BC_JFUNCF);
+LJ_STATIC_ASSERT((int)BC_FUNCV + 1 == (int)BC_IFUNCV);
+LJ_STATIC_ASSERT((int)BC_FUNCV + 2 == (int)BC_JFUNCV);
+
+/* This solves a circular dependency problem, change as needed. */
+#define FF_next_N	4
+
+/* Stack slots used by FORI/FORL, relative to operand A. */
+enum {
+  FORL_IDX, FORL_STOP, FORL_STEP, FORL_EXT
+};
+
+/* Bytecode operand modes. ORDER BCMode */
+typedef enum {
+  BCMnone, BCMdst, BCMbase, BCMvar, BCMrbase, BCMuv,  /* Mode A must be <= 7 */
+  BCMlit, BCMlits, BCMpri, BCMnum, BCMstr, BCMtab, BCMfunc, BCMjump, BCMcdata,
+  BCM_max
+} BCMode;
+#define BCM___		BCMnone
+
+#define bcmode_a(op)	((BCMode)(lj_bc_mode[op] & 7))
+#define bcmode_b(op)	((BCMode)((lj_bc_mode[op]>>3) & 15))
+#define bcmode_c(op)	((BCMode)((lj_bc_mode[op]>>7) & 15))
+#define bcmode_d(op)	bcmode_c(op)
+#define bcmode_hasd(op)	((lj_bc_mode[op] & (15<<3)) == (BCMnone<<3))
+#define bcmode_mm(op)	((MMS)(lj_bc_mode[op]>>11))
+
+#define BCMODE(name, ma, mb, mc, mm) \
+  (BCM##ma|(BCM##mb<<3)|(BCM##mc<<7)|(MM_##mm<<11)),
+#define BCMODE_FF	0
+
+static LJ_AINLINE int bc_isret(BCOp op)
+{
+  return (op == BC_RETM || op == BC_RET || op == BC_RET0 || op == BC_RET1);
+}
+
+LJ_DATA const uint16_t lj_bc_mode[];
+LJ_DATA const uint16_t lj_bc_ofs[];
+
+#endif

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/1f27b840/lib/luajit/src/lj_bcdump.h
----------------------------------------------------------------------
diff --git a/lib/luajit/src/lj_bcdump.h b/lib/luajit/src/lj_bcdump.h
new file mode 100644
index 0000000..812d0e1
--- /dev/null
+++ b/lib/luajit/src/lj_bcdump.h
@@ -0,0 +1,66 @@
+/*
+** Bytecode dump definitions.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_BCDUMP_H
+#define _LJ_BCDUMP_H
+
+#include "lj_obj.h"
+#include "lj_lex.h"
+
+/* -- Bytecode dump format ------------------------------------------------ */
+
+/*
+** dump   = header proto+ 0U
+** header = ESC 'L' 'J' versionB flagsU [namelenU nameB*]
+** proto  = lengthU pdata
+** pdata  = phead bcinsW* uvdataH* kgc* knum* [debugB*]
+** phead  = flagsB numparamsB framesizeB numuvB numkgcU numknU numbcU
+**          [debuglenU [firstlineU numlineU]]
+** kgc    = kgctypeU { ktab | (loU hiU) | (rloU rhiU iloU ihiU) | strB* }
+** knum   = intU0 | (loU1 hiU)
+** ktab   = narrayU nhashU karray* khash*
+** karray = ktabk
+** khash  = ktabk ktabk
+** ktabk  = ktabtypeU { intU | (loU hiU) | strB* }
+**
+** B = 8 bit, H = 16 bit, W = 32 bit, U = ULEB128 of W, U0/U1 = ULEB128 of W+1
+*/
+
+/* Bytecode dump header. */
+#define BCDUMP_HEAD1		0x1b
+#define BCDUMP_HEAD2		0x4c
+#define BCDUMP_HEAD3		0x4a
+
+/* If you perform *any* kind of private modifications to the bytecode itself
+** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher.
+*/
+#define BCDUMP_VERSION		1
+
+/* Compatibility flags. */
+#define BCDUMP_F_BE		0x01
+#define BCDUMP_F_STRIP		0x02
+#define BCDUMP_F_FFI		0x04
+
+#define BCDUMP_F_KNOWN		(BCDUMP_F_FFI*2-1)
+
+/* Type codes for the GC constants of a prototype. Plus length for strings. */
+enum {
+  BCDUMP_KGC_CHILD, BCDUMP_KGC_TAB, BCDUMP_KGC_I64, BCDUMP_KGC_U64,
+  BCDUMP_KGC_COMPLEX, BCDUMP_KGC_STR
+};
+
+/* Type codes for the keys/values of a constant table. */
+enum {
+  BCDUMP_KTAB_NIL, BCDUMP_KTAB_FALSE, BCDUMP_KTAB_TRUE,
+  BCDUMP_KTAB_INT, BCDUMP_KTAB_NUM, BCDUMP_KTAB_STR
+};
+
+/* -- Bytecode reader/writer ---------------------------------------------- */
+
+LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer,
+		       void *data, int strip);
+LJ_FUNC GCproto *lj_bcread(LexState *ls);
+
+#endif

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/1f27b840/lib/luajit/src/lj_bcread.c
----------------------------------------------------------------------
diff --git a/lib/luajit/src/lj_bcread.c b/lib/luajit/src/lj_bcread.c
new file mode 100644
index 0000000..25859d2
--- /dev/null
+++ b/lib/luajit/src/lj_bcread.c
@@ -0,0 +1,476 @@
+/*
+** Bytecode reader.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_bcread_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_bc.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#include "lj_cdata.h"
+#include "lualib.h"
+#endif
+#include "lj_lex.h"
+#include "lj_bcdump.h"
+#include "lj_state.h"
+
+/* Reuse some lexer fields for our own purposes. */
+#define bcread_flags(ls)	ls->level
+#define bcread_swap(ls) \
+  ((bcread_flags(ls) & BCDUMP_F_BE) != LJ_BE*BCDUMP_F_BE)
+#define bcread_oldtop(L, ls)	restorestack(L, ls->lastline)
+#define bcread_savetop(L, ls, top) \
+  ls->lastline = (BCLine)savestack(L, (top))
+
+/* -- Input buffer handling ----------------------------------------------- */
+
+/* Throw reader error. */
+static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em)
+{
+  lua_State *L = ls->L;
+  const char *name = ls->chunkarg;
+  if (*name == BCDUMP_HEAD1) name = "(binary)";
+  else if (*name == '@' || *name == '=') name++;
+  lj_str_pushf(L, "%s: %s", name, err2msg(em));
+  lj_err_throw(L, LUA_ERRSYNTAX);
+}
+
+/* Resize input buffer. */
+static void bcread_resize(LexState *ls, MSize len)
+{
+  if (ls->sb.sz < len) {
+    MSize sz = ls->sb.sz * 2;
+    while (len > sz) sz = sz * 2;
+    lj_str_resizebuf(ls->L, &ls->sb, sz);
+    /* Caveat: this may change ls->sb.buf which may affect ls->p. */
+  }
+}
+
+/* Refill buffer if needed. */
+static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
+{
+  lua_assert(len != 0);
+  if (len > LJ_MAX_MEM || ls->current < 0)
+    bcread_error(ls, LJ_ERR_BCBAD);
+  do {
+    const char *buf;
+    size_t size;
+    if (ls->n) {  /* Copy remainder to buffer. */
+      if (ls->sb.n) {  /* Move down in buffer. */
+	lua_assert(ls->p + ls->n == ls->sb.buf + ls->sb.n);
+	if (ls->n != ls->sb.n)
+	  memmove(ls->sb.buf, ls->p, ls->n);
+      } else {  /* Copy from buffer provided by reader. */
+	bcread_resize(ls, len);
+	memcpy(ls->sb.buf, ls->p, ls->n);
+      }
+      ls->p = ls->sb.buf;
+    }
+    ls->sb.n = ls->n;
+    buf = ls->rfunc(ls->L, ls->rdata, &size);  /* Get more data from reader. */
+    if (buf == NULL || size == 0) {  /* EOF? */
+      if (need) bcread_error(ls, LJ_ERR_BCBAD);
+      ls->current = -1;  /* Only bad if we get called again. */
+      break;
+    }
+    if (ls->sb.n) {  /* Append to buffer. */
+      MSize n = ls->sb.n + (MSize)size;
+      bcread_resize(ls, n < len ? len : n);
+      memcpy(ls->sb.buf + ls->sb.n, buf, size);
+      ls->n = ls->sb.n = n;
+      ls->p = ls->sb.buf;
+    } else {  /* Return buffer provided by reader. */
+      ls->n = (MSize)size;
+      ls->p = buf;
+    }
+  } while (ls->n < len);
+}
+
+/* Need a certain number of bytes. */
+static LJ_AINLINE void bcread_need(LexState *ls, MSize len)
+{
+  if (LJ_UNLIKELY(ls->n < len))
+    bcread_fill(ls, len, 1);
+}
+
+/* Want to read up to a certain number of bytes, but may need less. */
+static LJ_AINLINE void bcread_want(LexState *ls, MSize len)
+{
+  if (LJ_UNLIKELY(ls->n < len))
+    bcread_fill(ls, len, 0);
+}
+
+#define bcread_dec(ls)		check_exp(ls->n > 0, ls->n--)
+#define bcread_consume(ls, len)	check_exp(ls->n >= (len), ls->n -= (len))
+
+/* Return memory block from buffer. */
+static uint8_t *bcread_mem(LexState *ls, MSize len)
+{
+  uint8_t *p = (uint8_t *)ls->p;
+  bcread_consume(ls, len);
+  ls->p = (char *)p + len;
+  return p;
+}
+
+/* Copy memory block from buffer. */
+static void bcread_block(LexState *ls, void *q, MSize len)
+{
+  memcpy(q, bcread_mem(ls, len), len);
+}
+
+/* Read byte from buffer. */
+static LJ_AINLINE uint32_t bcread_byte(LexState *ls)
+{
+  bcread_dec(ls);
+  return (uint32_t)(uint8_t)*ls->p++;
+}
+
+/* Read ULEB128 value from buffer. */
+static uint32_t bcread_uleb128(LexState *ls)
+{
+  const uint8_t *p = (const uint8_t *)ls->p;
+  uint32_t v = *p++;
+  if (LJ_UNLIKELY(v >= 0x80)) {
+    int sh = 0;
+    v &= 0x7f;
+    do {
+     v |= ((*p & 0x7f) << (sh += 7));
+     bcread_dec(ls);
+   } while (*p++ >= 0x80);
+  }
+  bcread_dec(ls);
+  ls->p = (char *)p;
+  return v;
+}
+
+/* Read top 32 bits of 33 bit ULEB128 value from buffer. */
+static uint32_t bcread_uleb128_33(LexState *ls)
+{
+  const uint8_t *p = (const uint8_t *)ls->p;
+  uint32_t v = (*p++ >> 1);
+  if (LJ_UNLIKELY(v >= 0x40)) {
+    int sh = -1;
+    v &= 0x3f;
+    do {
+     v |= ((*p & 0x7f) << (sh += 7));
+     bcread_dec(ls);
+   } while (*p++ >= 0x80);
+  }
+  bcread_dec(ls);
+  ls->p = (char *)p;
+  return v;
+}
+
+/* -- Bytecode reader ----------------------------------------------------- */
+
+/* Read debug info of a prototype. */
+static void bcread_dbg(LexState *ls, GCproto *pt, MSize sizedbg)
+{
+  void *lineinfo = (void *)proto_lineinfo(pt);
+  bcread_block(ls, lineinfo, sizedbg);
+  /* Swap lineinfo if the endianess differs. */
+  if (bcread_swap(ls) && pt->numline >= 256) {
+    MSize i, n = pt->sizebc-1;
+    if (pt->numline < 65536) {
+      uint16_t *p = (uint16_t *)lineinfo;
+      for (i = 0; i < n; i++) p[i] = (uint16_t)((p[i] >> 8)|(p[i] << 8));
+    } else {
+      uint32_t *p = (uint32_t *)lineinfo;
+      for (i = 0; i < n; i++) p[i] = lj_bswap(p[i]);
+    }
+  }
+}
+
+/* Find pointer to varinfo. */
+static const void *bcread_varinfo(GCproto *pt)
+{
+  const uint8_t *p = proto_uvinfo(pt);
+  MSize n = pt->sizeuv;
+  if (n) while (*p++ || --n) ;
+  return p;
+}
+
+/* Read a single constant key/value of a template table. */
+static void bcread_ktabk(LexState *ls, TValue *o)
+{
+  MSize tp = bcread_uleb128(ls);
+  if (tp >= BCDUMP_KTAB_STR) {
+    MSize len = tp - BCDUMP_KTAB_STR;
+    const char *p = (const char *)bcread_mem(ls, len);
+    setstrV(ls->L, o, lj_str_new(ls->L, p, len));
+  } else if (tp == BCDUMP_KTAB_INT) {
+    setintV(o, (int32_t)bcread_uleb128(ls));
+  } else if (tp == BCDUMP_KTAB_NUM) {
+    o->u32.lo = bcread_uleb128(ls);
+    o->u32.hi = bcread_uleb128(ls);
+  } else {
+    lua_assert(tp <= BCDUMP_KTAB_TRUE);
+    setitype(o, ~tp);
+  }
+}
+
+/* Read a template table. */
+static GCtab *bcread_ktab(LexState *ls)
+{
+  MSize narray = bcread_uleb128(ls);
+  MSize nhash = bcread_uleb128(ls);
+  GCtab *t = lj_tab_new(ls->L, narray, hsize2hbits(nhash));
+  if (narray) {  /* Read array entries. */
+    MSize i;
+    TValue *o = tvref(t->array);
+    for (i = 0; i < narray; i++, o++)
+      bcread_ktabk(ls, o);
+  }
+  if (nhash) {  /* Read hash entries. */
+    MSize i;
+    for (i = 0; i < nhash; i++) {
+      TValue key;
+      bcread_ktabk(ls, &key);
+      lua_assert(!tvisnil(&key));
+      bcread_ktabk(ls, lj_tab_set(ls->L, t, &key));
+    }
+  }
+  return t;
+}
+
+/* Read GC constants of a prototype. */
+static void bcread_kgc(LexState *ls, GCproto *pt, MSize sizekgc)
+{
+  MSize i;
+  GCRef *kr = mref(pt->k, GCRef) - (ptrdiff_t)sizekgc;
+  for (i = 0; i < sizekgc; i++, kr++) {
+    MSize tp = bcread_uleb128(ls);
+    if (tp >= BCDUMP_KGC_STR) {
+      MSize len = tp - BCDUMP_KGC_STR;
+      const char *p = (const char *)bcread_mem(ls, len);
+      setgcref(*kr, obj2gco(lj_str_new(ls->L, p, len)));
+    } else if (tp == BCDUMP_KGC_TAB) {
+      setgcref(*kr, obj2gco(bcread_ktab(ls)));
+#if LJ_HASFFI
+    } else if (tp != BCDUMP_KGC_CHILD) {
+      CTypeID id = tp == BCDUMP_KGC_COMPLEX ? CTID_COMPLEX_DOUBLE :
+		   tp == BCDUMP_KGC_I64 ? CTID_INT64 : CTID_UINT64;
+      CTSize sz = tp == BCDUMP_KGC_COMPLEX ? 16 : 8;
+      GCcdata *cd = lj_cdata_new_(ls->L, id, sz);
+      TValue *p = (TValue *)cdataptr(cd);
+      setgcref(*kr, obj2gco(cd));
+      p[0].u32.lo = bcread_uleb128(ls);
+      p[0].u32.hi = bcread_uleb128(ls);
+      if (tp == BCDUMP_KGC_COMPLEX) {
+	p[1].u32.lo = bcread_uleb128(ls);
+	p[1].u32.hi = bcread_uleb128(ls);
+      }
+#endif
+    } else {
+      lua_State *L = ls->L;
+      lua_assert(tp == BCDUMP_KGC_CHILD);
+      if (L->top <= bcread_oldtop(L, ls))  /* Stack underflow? */
+	bcread_error(ls, LJ_ERR_BCBAD);
+      L->top--;
+      setgcref(*kr, obj2gco(protoV(L->top)));
+    }
+  }
+}
+
+/* Read number constants of a prototype. */
+static void bcread_knum(LexState *ls, GCproto *pt, MSize sizekn)
+{
+  MSize i;
+  TValue *o = mref(pt->k, TValue);
+  for (i = 0; i < sizekn; i++, o++) {
+    int isnum = (ls->p[0] & 1);
+    uint32_t lo = bcread_uleb128_33(ls);
+    if (isnum) {
+      o->u32.lo = lo;
+      o->u32.hi = bcread_uleb128(ls);
+    } else {
+      setintV(o, lo);
+    }
+  }
+}
+
+/* Read bytecode instructions. */
+static void bcread_bytecode(LexState *ls, GCproto *pt, MSize sizebc)
+{
+  BCIns *bc = proto_bc(pt);
+  bc[0] = BCINS_AD((pt->flags & PROTO_VARARG) ? BC_FUNCV : BC_FUNCF,
+		   pt->framesize, 0);
+  bcread_block(ls, bc+1, (sizebc-1)*(MSize)sizeof(BCIns));
+  /* Swap bytecode instructions if the endianess differs. */
+  if (bcread_swap(ls)) {
+    MSize i;
+    for (i = 1; i < sizebc; i++) bc[i] = lj_bswap(bc[i]);
+  }
+}
+
+/* Read upvalue refs. */
+static void bcread_uv(LexState *ls, GCproto *pt, MSize sizeuv)
+{
+  if (sizeuv) {
+    uint16_t *uv = proto_uv(pt);
+    bcread_block(ls, uv, sizeuv*2);
+    /* Swap upvalue refs if the endianess differs. */
+    if (bcread_swap(ls)) {
+      MSize i;
+      for (i = 0; i < sizeuv; i++)
+	uv[i] = (uint16_t)((uv[i] >> 8)|(uv[i] << 8));
+    }
+  }
+}
+
+/* Read a prototype. */
+static GCproto *bcread_proto(LexState *ls)
+{
+  GCproto *pt;
+  MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept;
+  MSize ofsk, ofsuv, ofsdbg;
+  MSize sizedbg = 0;
+  BCLine firstline = 0, numline = 0;
+  MSize len, startn;
+
+  /* Read length. */
+  if (ls->n > 0 && ls->p[0] == 0) {  /* Shortcut EOF. */
+    ls->n--; ls->p++;
+    return NULL;
+  }
+  bcread_want(ls, 5);
+  len = bcread_uleb128(ls);
+  if (!len) return NULL;  /* EOF */
+  bcread_need(ls, len);
+  startn = ls->n;
+
+  /* Read prototype header. */
+  flags = bcread_byte(ls);
+  numparams = bcread_byte(ls);
+  framesize = bcread_byte(ls);
+  sizeuv = bcread_byte(ls);
+  sizekgc = bcread_uleb128(ls);
+  sizekn = bcread_uleb128(ls);
+  sizebc = bcread_uleb128(ls) + 1;
+  if (!(bcread_flags(ls) & BCDUMP_F_STRIP)) {
+    sizedbg = bcread_uleb128(ls);
+    if (sizedbg) {
+      firstline = bcread_uleb128(ls);
+      numline = bcread_uleb128(ls);
+    }
+  }
+
+  /* Calculate total size of prototype including all colocated arrays. */
+  sizept = (MSize)sizeof(GCproto) +
+	   sizebc*(MSize)sizeof(BCIns) +
+	   sizekgc*(MSize)sizeof(GCRef);
+  sizept = (sizept + (MSize)sizeof(TValue)-1) & ~((MSize)sizeof(TValue)-1);
+  ofsk = sizept; sizept += sizekn*(MSize)sizeof(TValue);
+  ofsuv = sizept; sizept += ((sizeuv+1)&~1)*2;
+  ofsdbg = sizept; sizept += sizedbg;
+
+  /* Allocate prototype object and initialize its fields. */
+  pt = (GCproto *)lj_mem_newgco(ls->L, (MSize)sizept);
+  pt->gct = ~LJ_TPROTO;
+  pt->numparams = (uint8_t)numparams;
+  pt->framesize = (uint8_t)framesize;
+  pt->sizebc = sizebc;
+  setmref(pt->k, (char *)pt + ofsk);
+  setmref(pt->uv, (char *)pt + ofsuv);
+  pt->sizekgc = 0;  /* Set to zero until fully initialized. */
+  pt->sizekn = sizekn;
+  pt->sizept = sizept;
+  pt->sizeuv = (uint8_t)sizeuv;
+  pt->flags = (uint8_t)flags;
+  pt->trace = 0;
+  setgcref(pt->chunkname, obj2gco(ls->chunkname));
+
+  /* Close potentially uninitialized gap between bc and kgc. */
+  *(uint32_t *)((char *)pt + ofsk - sizeof(GCRef)*(sizekgc+1)) = 0;
+
+  /* Read bytecode instructions and upvalue refs. */
+  bcread_bytecode(ls, pt, sizebc);
+  bcread_uv(ls, pt, sizeuv);
+
+  /* Read constants. */
+  bcread_kgc(ls, pt, sizekgc);
+  pt->sizekgc = sizekgc;
+  bcread_knum(ls, pt, sizekn);
+
+  /* Read and initialize debug info. */
+  pt->firstline = firstline;
+  pt->numline = numline;
+  if (sizedbg) {
+    MSize sizeli = (sizebc-1) << (numline < 256 ? 0 : numline < 65536 ? 1 : 2);
+    setmref(pt->lineinfo, (char *)pt + ofsdbg);
+    setmref(pt->uvinfo, (char *)pt + ofsdbg + sizeli);
+    bcread_dbg(ls, pt, sizedbg);
+    setmref(pt->varinfo, bcread_varinfo(pt));
+  } else {
+    setmref(pt->lineinfo, NULL);
+    setmref(pt->uvinfo, NULL);
+    setmref(pt->varinfo, NULL);
+  }
+
+  if (len != startn - ls->n)
+    bcread_error(ls, LJ_ERR_BCBAD);
+  return pt;
+}
+
+/* Read and check header of bytecode dump. */
+static int bcread_header(LexState *ls)
+{
+  uint32_t flags;
+  bcread_want(ls, 3+5+5);
+  if (bcread_byte(ls) != BCDUMP_HEAD2 ||
+      bcread_byte(ls) != BCDUMP_HEAD3 ||
+      bcread_byte(ls) != BCDUMP_VERSION) return 0;
+  bcread_flags(ls) = flags = bcread_uleb128(ls);
+  if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0;
+  if ((flags & BCDUMP_F_FFI)) {
+#if LJ_HASFFI
+    lua_State *L = ls->L;
+    if (!ctype_ctsG(G(L))) {
+      ptrdiff_t oldtop = savestack(L, L->top);
+      luaopen_ffi(L);  /* Load FFI library on-demand. */
+      L->top = restorestack(L, oldtop);
+    }
+#else
+    return 0;
+#endif
+  }
+  if ((flags & BCDUMP_F_STRIP)) {
+    ls->chunkname = lj_str_newz(ls->L, ls->chunkarg);
+  } else {
+    MSize len = bcread_uleb128(ls);
+    bcread_need(ls, len);
+    ls->chunkname = lj_str_new(ls->L, (const char *)bcread_mem(ls, len), len);
+  }
+  return 1;  /* Ok. */
+}
+
+/* Read a bytecode dump. */
+GCproto *lj_bcread(LexState *ls)
+{
+  lua_State *L = ls->L;
+  lua_assert(ls->current == BCDUMP_HEAD1);
+  bcread_savetop(L, ls, L->top);
+  lj_str_resetbuf(&ls->sb);
+  /* Check for a valid bytecode dump header. */
+  if (!bcread_header(ls))
+    bcread_error(ls, LJ_ERR_BCFMT);
+  for (;;) {  /* Process all prototypes in the bytecode dump. */
+    GCproto *pt = bcread_proto(ls);
+    if (!pt) break;
+    setprotoV(L, L->top, pt);
+    incr_top(L);
+  }
+  if ((int32_t)ls->n > 0 || L->top-1 != bcread_oldtop(L, ls))
+    bcread_error(ls, LJ_ERR_BCBAD);
+  /* Pop off last prototype. */
+  L->top--;
+  return protoV(L->top);
+}
+

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/1f27b840/lib/luajit/src/lj_bcwrite.c
----------------------------------------------------------------------
diff --git a/lib/luajit/src/lj_bcwrite.c b/lib/luajit/src/lj_bcwrite.c
new file mode 100644
index 0000000..ff97450
--- /dev/null
+++ b/lib/luajit/src/lj_bcwrite.c
@@ -0,0 +1,396 @@
+/*
+** Bytecode writer.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_bcwrite_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_str.h"
+#include "lj_bc.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#endif
+#if LJ_HASJIT
+#include "lj_dispatch.h"
+#include "lj_jit.h"
+#endif
+#include "lj_bcdump.h"
+#include "lj_vm.h"
+
+/* Context for bytecode writer. */
+typedef struct BCWriteCtx {
+  SBuf sb;			/* Output buffer. */
+  lua_State *L;			/* Lua state. */
+  GCproto *pt;			/* Root prototype. */
+  lua_Writer wfunc;		/* Writer callback. */
+  void *wdata;			/* Writer callback data. */
+  int strip;			/* Strip debug info. */
+  int status;			/* Status from writer callback. */
+} BCWriteCtx;
+
+/* -- Output buffer handling ---------------------------------------------- */
+
+/* Resize buffer if needed. */
+static LJ_NOINLINE void bcwrite_resize(BCWriteCtx *ctx, MSize len)
+{
+  MSize sz = ctx->sb.sz * 2;
+  while (ctx->sb.n + len > sz) sz = sz * 2;
+  lj_str_resizebuf(ctx->L, &ctx->sb, sz);
+}
+
+/* Need a certain amount of buffer space. */
+static LJ_AINLINE void bcwrite_need(BCWriteCtx *ctx, MSize len)
+{
+  if (LJ_UNLIKELY(ctx->sb.n + len > ctx->sb.sz))
+    bcwrite_resize(ctx, len);
+}
+
+/* Add memory block to buffer. */
+static void bcwrite_block(BCWriteCtx *ctx, const void *p, MSize len)
+{
+  uint8_t *q = (uint8_t *)(ctx->sb.buf + ctx->sb.n);
+  MSize i;
+  ctx->sb.n += len;
+  for (i = 0; i < len; i++) q[i] = ((uint8_t *)p)[i];
+}
+
+/* Add byte to buffer. */
+static LJ_AINLINE void bcwrite_byte(BCWriteCtx *ctx, uint8_t b)
+{
+  ctx->sb.buf[ctx->sb.n++] = b;
+}
+
+/* Add ULEB128 value to buffer. */
+static void bcwrite_uleb128(BCWriteCtx *ctx, uint32_t v)
+{
+  MSize n = ctx->sb.n;
+  uint8_t *p = (uint8_t *)ctx->sb.buf;
+  for (; v >= 0x80; v >>= 7)
+    p[n++] = (uint8_t)((v & 0x7f) | 0x80);
+  p[n++] = (uint8_t)v;
+  ctx->sb.n = n;
+}
+
+/* -- Bytecode writer ----------------------------------------------------- */
+
+/* Write a single constant key/value of a template table. */
+static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
+{
+  bcwrite_need(ctx, 1+10);
+  if (tvisstr(o)) {
+    const GCstr *str = strV(o);
+    MSize len = str->len;
+    bcwrite_need(ctx, 5+len);
+    bcwrite_uleb128(ctx, BCDUMP_KTAB_STR+len);
+    bcwrite_block(ctx, strdata(str), len);
+  } else if (tvisint(o)) {
+    bcwrite_byte(ctx, BCDUMP_KTAB_INT);
+    bcwrite_uleb128(ctx, intV(o));
+  } else if (tvisnum(o)) {
+    if (!LJ_DUALNUM && narrow) {  /* Narrow number constants to integers. */
+      lua_Number num = numV(o);
+      int32_t k = lj_num2int(num);
+      if (num == (lua_Number)k) {  /* -0 is never a constant. */
+	bcwrite_byte(ctx, BCDUMP_KTAB_INT);
+	bcwrite_uleb128(ctx, k);
+	return;
+      }
+    }
+    bcwrite_byte(ctx, BCDUMP_KTAB_NUM);
+    bcwrite_uleb128(ctx, o->u32.lo);
+    bcwrite_uleb128(ctx, o->u32.hi);
+  } else {
+    lua_assert(tvispri(o));
+    bcwrite_byte(ctx, BCDUMP_KTAB_NIL+~itype(o));
+  }
+}
+
+/* Write a template table. */
+static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t)
+{
+  MSize narray = 0, nhash = 0;
+  if (t->asize > 0) {  /* Determine max. length of array part. */
+    ptrdiff_t i;
+    TValue *array = tvref(t->array);
+    for (i = (ptrdiff_t)t->asize-1; i >= 0; i--)
+      if (!tvisnil(&array[i]))
+	break;
+    narray = (MSize)(i+1);
+  }
+  if (t->hmask > 0) {  /* Count number of used hash slots. */
+    MSize i, hmask = t->hmask;
+    Node *node = noderef(t->node);
+    for (i = 0; i <= hmask; i++)
+      nhash += !tvisnil(&node[i].val);
+  }
+  /* Write number of array slots and hash slots. */
+  bcwrite_uleb128(ctx, narray);
+  bcwrite_uleb128(ctx, nhash);
+  if (narray) {  /* Write array entries (may contain nil). */
+    MSize i;
+    TValue *o = tvref(t->array);
+    for (i = 0; i < narray; i++, o++)
+      bcwrite_ktabk(ctx, o, 1);
+  }
+  if (nhash) {  /* Write hash entries. */
+    MSize i = nhash;
+    Node *node = noderef(t->node) + t->hmask;
+    for (;; node--)
+      if (!tvisnil(&node->val)) {
+	bcwrite_ktabk(ctx, &node->key, 0);
+	bcwrite_ktabk(ctx, &node->val, 1);
+	if (--i == 0) break;
+      }
+  }
+}
+
+/* Write GC constants of a prototype. */
+static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
+{
+  MSize i, sizekgc = pt->sizekgc;
+  GCRef *kr = mref(pt->k, GCRef) - (ptrdiff_t)sizekgc;
+  for (i = 0; i < sizekgc; i++, kr++) {
+    GCobj *o = gcref(*kr);
+    MSize tp, need = 1;
+    /* Determine constant type and needed size. */
+    if (o->gch.gct == ~LJ_TSTR) {
+      tp = BCDUMP_KGC_STR + gco2str(o)->len;
+      need = 5+gco2str(o)->len;
+    } else if (o->gch.gct == ~LJ_TPROTO) {
+      lua_assert((pt->flags & PROTO_CHILD));
+      tp = BCDUMP_KGC_CHILD;
+#if LJ_HASFFI
+    } else if (o->gch.gct == ~LJ_TCDATA) {
+      CTypeID id = gco2cd(o)->ctypeid;
+      need = 1+4*5;
+      if (id == CTID_INT64) {
+	tp = BCDUMP_KGC_I64;
+      } else if (id == CTID_UINT64) {
+	tp = BCDUMP_KGC_U64;
+      } else {
+	lua_assert(id == CTID_COMPLEX_DOUBLE);
+	tp = BCDUMP_KGC_COMPLEX;
+      }
+#endif
+    } else {
+      lua_assert(o->gch.gct == ~LJ_TTAB);
+      tp = BCDUMP_KGC_TAB;
+      need = 1+2*5;
+    }
+    /* Write constant type. */
+    bcwrite_need(ctx, need);
+    bcwrite_uleb128(ctx, tp);
+    /* Write constant data (if any). */
+    if (tp >= BCDUMP_KGC_STR) {
+      bcwrite_block(ctx, strdata(gco2str(o)), gco2str(o)->len);
+    } else if (tp == BCDUMP_KGC_TAB) {
+      bcwrite_ktab(ctx, gco2tab(o));
+#if LJ_HASFFI
+    } else if (tp != BCDUMP_KGC_CHILD) {
+      cTValue *p = (TValue *)cdataptr(gco2cd(o));
+      bcwrite_uleb128(ctx, p[0].u32.lo);
+      bcwrite_uleb128(ctx, p[0].u32.hi);
+      if (tp == BCDUMP_KGC_COMPLEX) {
+	bcwrite_uleb128(ctx, p[1].u32.lo);
+	bcwrite_uleb128(ctx, p[1].u32.hi);
+      }
+#endif
+    }
+  }
+}
+
+/* Write number constants of a prototype. */
+static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
+{
+  MSize i, sizekn = pt->sizekn;
+  cTValue *o = mref(pt->k, TValue);
+  bcwrite_need(ctx, 10*sizekn);
+  for (i = 0; i < sizekn; i++, o++) {
+    int32_t k;
+    if (tvisint(o)) {
+      k = intV(o);
+      goto save_int;
+    } else {
+      /* Write a 33 bit ULEB128 for the int (lsb=0) or loword (lsb=1). */
+      if (!LJ_DUALNUM) {  /* Narrow number constants to integers. */
+	lua_Number num = numV(o);
+	k = lj_num2int(num);
+	if (num == (lua_Number)k) {  /* -0 is never a constant. */
+	save_int:
+	  bcwrite_uleb128(ctx, 2*(uint32_t)k | ((uint32_t)k & 0x80000000u));
+	  if (k < 0) {
+	    char *p = &ctx->sb.buf[ctx->sb.n-1];
+	    *p = (*p & 7) | ((k>>27) & 0x18);
+	  }
+	  continue;
+	}
+      }
+      bcwrite_uleb128(ctx, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u)));
+      if (o->u32.lo >= 0x80000000u) {
+	char *p = &ctx->sb.buf[ctx->sb.n-1];
+	*p = (*p & 7) | ((o->u32.lo>>27) & 0x18);
+      }
+      bcwrite_uleb128(ctx, o->u32.hi);
+    }
+  }
+}
+
+/* Write bytecode instructions. */
+static void bcwrite_bytecode(BCWriteCtx *ctx, GCproto *pt)
+{
+  MSize nbc = pt->sizebc-1;  /* Omit the [JI]FUNC* header. */
+#if LJ_HASJIT
+  uint8_t *p = (uint8_t *)&ctx->sb.buf[ctx->sb.n];
+#endif
+  bcwrite_block(ctx, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns));
+#if LJ_HASJIT
+  /* Unpatch modified bytecode containing ILOOP/JLOOP etc. */
+  if ((pt->flags & PROTO_ILOOP) || pt->trace) {
+    jit_State *J = L2J(ctx->L);
+    MSize i;
+    for (i = 0; i < nbc; i++, p += sizeof(BCIns)) {
+      BCOp op = (BCOp)p[LJ_ENDIAN_SELECT(0, 3)];
+      if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP ||
+	  op == BC_JFORI) {
+	p[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL);
+      } else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) {
+	BCReg rd = p[LJ_ENDIAN_SELECT(2, 1)] + (p[LJ_ENDIAN_SELECT(3, 0)] << 8);
+	BCIns ins = traceref(J, rd)->startins;
+	p[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_JFORL+BC_FORL);
+	p[LJ_ENDIAN_SELECT(2, 1)] = bc_c(ins);
+	p[LJ_ENDIAN_SELECT(3, 0)] = bc_b(ins);
+      }
+    }
+  }
+#endif
+}
+
+/* Write prototype. */
+static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
+{
+  MSize sizedbg = 0;
+
+  /* Recursively write children of prototype. */
+  if ((pt->flags & PROTO_CHILD)) {
+    ptrdiff_t i, n = pt->sizekgc;
+    GCRef *kr = mref(pt->k, GCRef) - 1;
+    for (i = 0; i < n; i++, kr--) {
+      GCobj *o = gcref(*kr);
+      if (o->gch.gct == ~LJ_TPROTO)
+	bcwrite_proto(ctx, gco2pt(o));
+    }
+  }
+
+  /* Start writing the prototype info to a buffer. */
+  lj_str_resetbuf(&ctx->sb);
+  ctx->sb.n = 5;  /* Leave room for final size. */
+  bcwrite_need(ctx, 4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2);
+
+  /* Write prototype header. */
+  bcwrite_byte(ctx, (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI)));
+  bcwrite_byte(ctx, pt->numparams);
+  bcwrite_byte(ctx, pt->framesize);
+  bcwrite_byte(ctx, pt->sizeuv);
+  bcwrite_uleb128(ctx, pt->sizekgc);
+  bcwrite_uleb128(ctx, pt->sizekn);
+  bcwrite_uleb128(ctx, pt->sizebc-1);
+  if (!ctx->strip) {
+    if (proto_lineinfo(pt))
+      sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt);
+    bcwrite_uleb128(ctx, sizedbg);
+    if (sizedbg) {
+      bcwrite_uleb128(ctx, pt->firstline);
+      bcwrite_uleb128(ctx, pt->numline);
+    }
+  }
+
+  /* Write bytecode instructions and upvalue refs. */
+  bcwrite_bytecode(ctx, pt);
+  bcwrite_block(ctx, proto_uv(pt), pt->sizeuv*2);
+
+  /* Write constants. */
+  bcwrite_kgc(ctx, pt);
+  bcwrite_knum(ctx, pt);
+
+  /* Write debug info, if not stripped. */
+  if (sizedbg) {
+    bcwrite_need(ctx, sizedbg);
+    bcwrite_block(ctx, proto_lineinfo(pt), sizedbg);
+  }
+
+  /* Pass buffer to writer function. */
+  if (ctx->status == 0) {
+    MSize n = ctx->sb.n - 5;
+    MSize nn = (lj_fls(n)+8)*9 >> 6;
+    ctx->sb.n = 5 - nn;
+    bcwrite_uleb128(ctx, n);  /* Fill in final size. */
+    lua_assert(ctx->sb.n == 5);
+    ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf+5-nn, nn+n, ctx->wdata);
+  }
+}
+
+/* Write header of bytecode dump. */
+static void bcwrite_header(BCWriteCtx *ctx)
+{
+  GCstr *chunkname = proto_chunkname(ctx->pt);
+  const char *name = strdata(chunkname);
+  MSize len = chunkname->len;
+  lj_str_resetbuf(&ctx->sb);
+  bcwrite_need(ctx, 5+5+len);
+  bcwrite_byte(ctx, BCDUMP_HEAD1);
+  bcwrite_byte(ctx, BCDUMP_HEAD2);
+  bcwrite_byte(ctx, BCDUMP_HEAD3);
+  bcwrite_byte(ctx, BCDUMP_VERSION);
+  bcwrite_byte(ctx, (ctx->strip ? BCDUMP_F_STRIP : 0) +
+		   (LJ_BE ? BCDUMP_F_BE : 0) +
+		   ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0));
+  if (!ctx->strip) {
+    bcwrite_uleb128(ctx, len);
+    bcwrite_block(ctx, name, len);
+  }
+  ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf, ctx->sb.n, ctx->wdata);
+}
+
+/* Write footer of bytecode dump. */
+static void bcwrite_footer(BCWriteCtx *ctx)
+{
+  if (ctx->status == 0) {
+    uint8_t zero = 0;
+    ctx->status = ctx->wfunc(ctx->L, &zero, 1, ctx->wdata);
+  }
+}
+
+/* Protected callback for bytecode writer. */
+static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud)
+{
+  BCWriteCtx *ctx = (BCWriteCtx *)ud;
+  UNUSED(dummy);
+  lj_str_resizebuf(L, &ctx->sb, 1024);  /* Avoids resize for most prototypes. */
+  bcwrite_header(ctx);
+  bcwrite_proto(ctx, ctx->pt);
+  bcwrite_footer(ctx);
+  return NULL;
+}
+
+/* Write bytecode for a prototype. */
+int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data,
+	      int strip)
+{
+  BCWriteCtx ctx;
+  int status;
+  ctx.L = L;
+  ctx.pt = pt;
+  ctx.wfunc = writer;
+  ctx.wdata = data;
+  ctx.strip = strip;
+  ctx.status = 0;
+  lj_str_initbuf(&ctx.sb);
+  status = lj_vm_cpcall(L, NULL, &ctx, cpwriter);
+  if (status == 0) status = ctx.status;
+  lj_str_freebuf(G(ctx.L), &ctx.sb);
+  return status;
+}
+

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/1f27b840/lib/luajit/src/lj_carith.c
----------------------------------------------------------------------
diff --git a/lib/luajit/src/lj_carith.c b/lib/luajit/src/lj_carith.c
new file mode 100644
index 0000000..2a358a9
--- /dev/null
+++ b/lib/luajit/src/lj_carith.c
@@ -0,0 +1,353 @@
+/*
+** C data arithmetic.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#include "lj_obj.h"
+
+#if LJ_HASFFI
+
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_tab.h"
+#include "lj_meta.h"
+#include "lj_ctype.h"
+#include "lj_cconv.h"
+#include "lj_cdata.h"
+#include "lj_carith.h"
+
+/* -- C data arithmetic --------------------------------------------------- */
+
+/* Binary operands of an operator converted to ctypes. */
+typedef struct CDArith {
+  uint8_t *p[2];
+  CType *ct[2];
+} CDArith;
+
+/* Check arguments for arithmetic metamethods. */
+static int carith_checkarg(lua_State *L, CTState *cts, CDArith *ca)
+{
+  TValue *o = L->base;
+  int ok = 1;
+  MSize i;
+  if (o+1 >= L->top)
+    lj_err_argt(L, 1, LUA_TCDATA);
+  for (i = 0; i < 2; i++, o++) {
+    if (tviscdata(o)) {
+      GCcdata *cd = cdataV(o);
+      CTypeID id = (CTypeID)cd->ctypeid;
+      CType *ct = ctype_raw(cts, id);
+      uint8_t *p = (uint8_t *)cdataptr(cd);
+      if (ctype_isptr(ct->info)) {
+	p = (uint8_t *)cdata_getptr(p, ct->size);
+	if (ctype_isref(ct->info)) ct = ctype_rawchild(cts, ct);
+      } else if (ctype_isfunc(ct->info)) {
+	p = (uint8_t *)*(void **)p;
+	ct = ctype_get(cts,
+	  lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|id), CTSIZE_PTR));
+      }
+      if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct);
+      ca->ct[i] = ct;
+      ca->p[i] = p;
+    } else if (tvisint(o)) {
+      ca->ct[i] = ctype_get(cts, CTID_INT32);
+      ca->p[i] = (uint8_t *)&o->i;
+    } else if (tvisnum(o)) {
+      ca->ct[i] = ctype_get(cts, CTID_DOUBLE);
+      ca->p[i] = (uint8_t *)&o->n;
+    } else if (tvisnil(o)) {
+      ca->ct[i] = ctype_get(cts, CTID_P_VOID);
+      ca->p[i] = (uint8_t *)0;
+    } else if (tvisstr(o)) {
+      TValue *o2 = i == 0 ? o+1 : o-1;
+      CType *ct = ctype_raw(cts, cdataV(o2)->ctypeid);
+      ca->ct[i] = NULL;
+      ca->p[i] = (uint8_t *)strVdata(o);
+      ok = 0;
+      if (ctype_isenum(ct->info)) {
+	CTSize ofs;
+	CType *cct = lj_ctype_getfield(cts, ct, strV(o), &ofs);
+	if (cct && ctype_isconstval(cct->info)) {
+	  ca->ct[i] = ctype_child(cts, cct);
+	  ca->p[i] = (uint8_t *)&cct->size;  /* Assumes ct does not grow. */
+	  ok = 1;
+	} else {
+	  ca->ct[1-i] = ct;  /* Use enum to improve error message. */
+	  ca->p[1-i] = NULL;
+	  break;
+	}
+      }
+    } else {
+      ca->ct[i] = NULL;
+      ca->p[i] = (void *)(intptr_t)1;  /* To make it unequal. */
+      ok = 0;
+    }
+  }
+  return ok;
+}
+
+/* Pointer arithmetic. */
+static int carith_ptr(lua_State *L, CTState *cts, CDArith *ca, MMS mm)
+{
+  CType *ctp = ca->ct[0];
+  uint8_t *pp = ca->p[0];
+  ptrdiff_t idx;
+  CTSize sz;
+  CTypeID id;
+  GCcdata *cd;
+  if (ctype_isptr(ctp->info) || ctype_isrefarray(ctp->info)) {
+    if ((mm == MM_sub || mm == MM_eq || mm == MM_lt || mm == MM_le) &&
+	(ctype_isptr(ca->ct[1]->info) || ctype_isrefarray(ca->ct[1]->info))) {
+      uint8_t *pp2 = ca->p[1];
+      if (mm == MM_eq) {  /* Pointer equality. Incompatible pointers are ok. */
+	setboolV(L->top-1, (pp == pp2));
+	return 1;
+      }
+      if (!lj_cconv_compatptr(cts, ctp, ca->ct[1], CCF_IGNQUAL))
+	return 0;
+      if (mm == MM_sub) {  /* Pointer difference. */
+	intptr_t diff;
+	sz = lj_ctype_size(cts, ctype_cid(ctp->info));  /* Element size. */
+	if (sz == 0 || sz == CTSIZE_INVALID)
+	  return 0;
+	diff = ((intptr_t)pp - (intptr_t)pp2) / (int32_t)sz;
+	/* All valid pointer differences on x64 are in (-2^47, +2^47),
+	** which fits into a double without loss of precision.
+	*/
+	setintptrV(L->top-1, (int32_t)diff);
+	return 1;
+      } else if (mm == MM_lt) {  /* Pointer comparison (unsigned). */
+	setboolV(L->top-1, ((uintptr_t)pp < (uintptr_t)pp2));
+	return 1;
+      } else {
+	lua_assert(mm == MM_le);
+	setboolV(L->top-1, ((uintptr_t)pp <= (uintptr_t)pp2));
+	return 1;
+      }
+    }
+    if (!((mm == MM_add || mm == MM_sub) && ctype_isnum(ca->ct[1]->info)))
+      return 0;
+    lj_cconv_ct_ct(cts, ctype_get(cts, CTID_INT_PSZ), ca->ct[1],
+		   (uint8_t *)&idx, ca->p[1], 0);
+    if (mm == MM_sub) idx = -idx;
+  } else if (mm == MM_add && ctype_isnum(ctp->info) &&
+      (ctype_isptr(ca->ct[1]->info) || ctype_isrefarray(ca->ct[1]->info))) {
+    /* Swap pointer and index. */
+    ctp = ca->ct[1]; pp = ca->p[1];
+    lj_cconv_ct_ct(cts, ctype_get(cts, CTID_INT_PSZ), ca->ct[0],
+		   (uint8_t *)&idx, ca->p[0], 0);
+  } else {
+    return 0;
+  }
+  sz = lj_ctype_size(cts, ctype_cid(ctp->info));  /* Element size. */
+  if (sz == CTSIZE_INVALID)
+    return 0;
+  pp += idx*(int32_t)sz;  /* Compute pointer + index. */
+  id = lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|ctype_cid(ctp->info)),
+		       CTSIZE_PTR);
+  cd = lj_cdata_new(cts, id, CTSIZE_PTR);
+  *(uint8_t **)cdataptr(cd) = pp;
+  setcdataV(L, L->top-1, cd);
+  lj_gc_check(L);
+  return 1;
+}
+
+/* 64 bit integer arithmetic. */
+static int carith_int64(lua_State *L, CTState *cts, CDArith *ca, MMS mm)
+{
+  if (ctype_isnum(ca->ct[0]->info) && ca->ct[0]->size <= 8 &&
+      ctype_isnum(ca->ct[1]->info) && ca->ct[1]->size <= 8) {
+    CTypeID id = (((ca->ct[0]->info & CTF_UNSIGNED) && ca->ct[0]->size == 8) ||
+		  ((ca->ct[1]->info & CTF_UNSIGNED) && ca->ct[1]->size == 8)) ?
+		 CTID_UINT64 : CTID_INT64;
+    CType *ct = ctype_get(cts, id);
+    GCcdata *cd;
+    uint64_t u0, u1, *up;
+    lj_cconv_ct_ct(cts, ct, ca->ct[0], (uint8_t *)&u0, ca->p[0], 0);
+    if (mm != MM_unm)
+      lj_cconv_ct_ct(cts, ct, ca->ct[1], (uint8_t *)&u1, ca->p[1], 0);
+    switch (mm) {
+    case MM_eq:
+      setboolV(L->top-1, (u0 == u1));
+      return 1;
+    case MM_lt:
+      setboolV(L->top-1,
+	       id == CTID_INT64 ? ((int64_t)u0 < (int64_t)u1) : (u0 < u1));
+      return 1;
+    case MM_le:
+      setboolV(L->top-1,
+	       id == CTID_INT64 ? ((int64_t)u0 <= (int64_t)u1) : (u0 <= u1));
+      return 1;
+    default: break;
+    }
+    cd = lj_cdata_new(cts, id, 8);
+    up = (uint64_t *)cdataptr(cd);
+    setcdataV(L, L->top-1, cd);
+    switch (mm) {
+    case MM_add: *up = u0 + u1; break;
+    case MM_sub: *up = u0 - u1; break;
+    case MM_mul: *up = u0 * u1; break;
+    case MM_div:
+      if (id == CTID_INT64)
+	*up = (uint64_t)lj_carith_divi64((int64_t)u0, (int64_t)u1);
+      else
+	*up = lj_carith_divu64(u0, u1);
+      break;
+    case MM_mod:
+      if (id == CTID_INT64)
+	*up = (uint64_t)lj_carith_modi64((int64_t)u0, (int64_t)u1);
+      else
+	*up = lj_carith_modu64(u0, u1);
+      break;
+    case MM_pow:
+      if (id == CTID_INT64)
+	*up = (uint64_t)lj_carith_powi64((int64_t)u0, (int64_t)u1);
+      else
+	*up = lj_carith_powu64(u0, u1);
+      break;
+    case MM_unm: *up = (uint64_t)-(int64_t)u0; break;
+    default: lua_assert(0); break;
+    }
+    lj_gc_check(L);
+    return 1;
+  }
+  return 0;
+}
+
+/* Handle ctype arithmetic metamethods. */
+static int lj_carith_meta(lua_State *L, CTState *cts, CDArith *ca, MMS mm)
+{
+  cTValue *tv = NULL;
+  if (tviscdata(L->base)) {
+    CTypeID id = cdataV(L->base)->ctypeid;
+    CType *ct = ctype_raw(cts, id);
+    if (ctype_isptr(ct->info)) id = ctype_cid(ct->info);
+    tv = lj_ctype_meta(cts, id, mm);
+  }
+  if (!tv && L->base+1 < L->top && tviscdata(L->base+1)) {
+    CTypeID id = cdataV(L->base+1)->ctypeid;
+    CType *ct = ctype_raw(cts, id);
+    if (ctype_isptr(ct->info)) id = ctype_cid(ct->info);
+    tv = lj_ctype_meta(cts, id, mm);
+  }
+  if (!tv) {
+    const char *repr[2];
+    int i, isenum = -1, isstr = -1;
+    if (mm == MM_eq) {  /* Equality checks never raise an error. */
+      int eq = ca->p[0] == ca->p[1];
+      setboolV(L->top-1, eq);
+      setboolV(&G(L)->tmptv2, eq);  /* Remember for trace recorder. */
+      return 1;
+    }
+    for (i = 0; i < 2; i++) {
+      if (ca->ct[i] && tviscdata(L->base+i)) {
+	if (ctype_isenum(ca->ct[i]->info)) isenum = i;
+	repr[i] = strdata(lj_ctype_repr(L, ctype_typeid(cts, ca->ct[i]), NULL));
+      } else {
+	if (tvisstr(&L->base[i])) isstr = i;
+	repr[i] = lj_typename(&L->base[i]);
+      }
+    }
+    if ((isenum ^ isstr) == 1)
+      lj_err_callerv(L, LJ_ERR_FFI_BADCONV, repr[isstr], repr[isenum]);
+    lj_err_callerv(L, mm == MM_len ? LJ_ERR_FFI_BADLEN :
+		      mm == MM_concat ? LJ_ERR_FFI_BADCONCAT :
+		      mm < MM_add ? LJ_ERR_FFI_BADCOMP : LJ_ERR_FFI_BADARITH,
+		   repr[0], repr[1]);
+  }
+  return lj_meta_tailcall(L, tv);
+}
+
+/* Arithmetic operators for cdata. */
+int lj_carith_op(lua_State *L, MMS mm)
+{
+  CTState *cts = ctype_cts(L);
+  CDArith ca;
+  if (carith_checkarg(L, cts, &ca)) {
+    if (carith_int64(L, cts, &ca, mm) || carith_ptr(L, cts, &ca, mm)) {
+      copyTV(L, &G(L)->tmptv2, L->top-1);  /* Remember for trace recorder. */
+      return 1;
+    }
+  }
+  return lj_carith_meta(L, cts, &ca, mm);
+}
+
+/* -- 64 bit integer arithmetic helpers ----------------------------------- */
+
+#if LJ_32 && LJ_HASJIT
+/* Signed/unsigned 64 bit multiplication. */
+int64_t lj_carith_mul64(int64_t a, int64_t b)
+{
+  return a * b;
+}
+#endif
+
+/* Unsigned 64 bit division. */
+uint64_t lj_carith_divu64(uint64_t a, uint64_t b)
+{
+  if (b == 0) return U64x(80000000,00000000);
+  return a / b;
+}
+
+/* Signed 64 bit division. */
+int64_t lj_carith_divi64(int64_t a, int64_t b)
+{
+  if (b == 0 || (a == (int64_t)U64x(80000000,00000000) && b == -1))
+    return U64x(80000000,00000000);
+  return a / b;
+}
+
+/* Unsigned 64 bit modulo. */
+uint64_t lj_carith_modu64(uint64_t a, uint64_t b)
+{
+  if (b == 0) return U64x(80000000,00000000);
+  return a % b;
+}
+
+/* Signed 64 bit modulo. */
+int64_t lj_carith_modi64(int64_t a, int64_t b)
+{
+  if (b == 0) return U64x(80000000,00000000);
+  if (a == (int64_t)U64x(80000000,00000000) && b == -1) return 0;
+  return a % b;
+}
+
+/* Unsigned 64 bit x^k. */
+uint64_t lj_carith_powu64(uint64_t x, uint64_t k)
+{
+  uint64_t y;
+  if (k == 0)
+    return 1;
+  for (; (k & 1) == 0; k >>= 1) x *= x;
+  y = x;
+  if ((k >>= 1) != 0) {
+    for (;;) {
+      x *= x;
+      if (k == 1) break;
+      if (k & 1) y *= x;
+      k >>= 1;
+    }
+    y *= x;
+  }
+  return y;
+}
+
+/* Signed 64 bit x^k. */
+int64_t lj_carith_powi64(int64_t x, int64_t k)
+{
+  if (k == 0)
+    return 1;
+  if (k < 0) {
+    if (x == 0)
+      return U64x(7fffffff,ffffffff);
+    else if (x == 1)
+      return 1;
+    else if (x == -1)
+      return (k & 1) ? -1 : 1;
+    else
+      return 0;
+  }
+  return (int64_t)lj_carith_powu64((uint64_t)x, (uint64_t)k);
+}
+
+#endif

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/1f27b840/lib/luajit/src/lj_carith.h
----------------------------------------------------------------------
diff --git a/lib/luajit/src/lj_carith.h b/lib/luajit/src/lj_carith.h
new file mode 100644
index 0000000..8c4bdbb
--- /dev/null
+++ b/lib/luajit/src/lj_carith.h
@@ -0,0 +1,27 @@
+/*
+** C data arithmetic.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_CARITH_H
+#define _LJ_CARITH_H
+
+#include "lj_obj.h"
+
+#if LJ_HASFFI
+
+LJ_FUNC int lj_carith_op(lua_State *L, MMS mm);
+
+#if LJ_32 && LJ_HASJIT
+LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k);
+#endif
+LJ_FUNC uint64_t lj_carith_divu64(uint64_t a, uint64_t b);
+LJ_FUNC int64_t lj_carith_divi64(int64_t a, int64_t b);
+LJ_FUNC uint64_t lj_carith_modu64(uint64_t a, uint64_t b);
+LJ_FUNC int64_t lj_carith_modi64(int64_t a, int64_t b);
+LJ_FUNC uint64_t lj_carith_powu64(uint64_t x, uint64_t k);
+LJ_FUNC int64_t lj_carith_powi64(int64_t x, int64_t k);
+
+#endif
+
+#endif

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/1f27b840/lib/luajit/src/lj_ccall.c
----------------------------------------------------------------------
diff --git a/lib/luajit/src/lj_ccall.c b/lib/luajit/src/lj_ccall.c
new file mode 100644
index 0000000..998417c
--- /dev/null
+++ b/lib/luajit/src/lj_ccall.c
@@ -0,0 +1,900 @@
+/*
+** FFI C call handling.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#include "lj_obj.h"
+
+#if LJ_HASFFI
+
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_ctype.h"
+#include "lj_cconv.h"
+#include "lj_cdata.h"
+#include "lj_ccall.h"
+#include "lj_trace.h"
+
+/* Target-specific handling of register arguments. */
+#if LJ_TARGET_X86
+/* -- x86 calling conventions --------------------------------------------- */
+
+#if LJ_ABI_WIN
+
+#define CCALL_HANDLE_STRUCTRET \
+  /* Return structs bigger than 8 by reference (on stack only). */ \
+  cc->retref = (sz > 8); \
+  if (cc->retref) cc->stack[nsp++] = (GPRArg)dp;
+
+#define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET
+
+#else
+
+#if LJ_TARGET_OSX
+
+#define CCALL_HANDLE_STRUCTRET \
+  /* Return structs of size 1, 2, 4 or 8 in registers. */ \
+  cc->retref = !(sz == 1 || sz == 2 || sz == 4 || sz == 8); \
+  if (cc->retref) { \
+    if (ngpr < maxgpr) \
+      cc->gpr[ngpr++] = (GPRArg)dp; \
+    else \
+      cc->stack[nsp++] = (GPRArg)dp; \
+  } else {  /* Struct with single FP field ends up in FPR. */ \
+    cc->resx87 = ccall_classify_struct(cts, ctr); \
+  }
+
+#define CCALL_HANDLE_STRUCTRET2 \
+  if (cc->resx87) sp = (uint8_t *)&cc->fpr[0]; \
+  memcpy(dp, sp, ctr->size);
+
+#else
+
+#define CCALL_HANDLE_STRUCTRET \
+  cc->retref = 1;  /* Return all structs by reference (in reg or on stack). */ \
+  if (ngpr < maxgpr) \
+    cc->gpr[ngpr++] = (GPRArg)dp; \
+  else \
+    cc->stack[nsp++] = (GPRArg)dp;
+
+#endif
+
+#define CCALL_HANDLE_COMPLEXRET \
+  /* Return complex float in GPRs and complex double by reference. */ \
+  cc->retref = (sz > 8); \
+  if (cc->retref) { \
+    if (ngpr < maxgpr) \
+      cc->gpr[ngpr++] = (GPRArg)dp; \
+    else \
+      cc->stack[nsp++] = (GPRArg)dp; \
+  }
+
+#endif
+
+#define CCALL_HANDLE_COMPLEXRET2 \
+  if (!cc->retref) \
+    *(int64_t *)dp = *(int64_t *)sp;  /* Copy complex float from GPRs. */
+
+#define CCALL_HANDLE_STRUCTARG \
+  ngpr = maxgpr;  /* Pass all structs by value on the stack. */
+
+#define CCALL_HANDLE_COMPLEXARG \
+  isfp = 1;  /* Pass complex by value on stack. */
+
+#define CCALL_HANDLE_REGARG \
+  if (!isfp) {  /* Only non-FP values may be passed in registers. */ \
+    if (n > 1) {  /* Anything > 32 bit is passed on the stack. */ \
+      if (!LJ_ABI_WIN) ngpr = maxgpr;  /* Prevent reordering. */ \
+    } else if (ngpr + 1 <= maxgpr) { \
+      dp = &cc->gpr[ngpr]; \
+      ngpr += n; \
+      goto done; \
+    } \
+  }
+
+#elif LJ_TARGET_X64 && LJ_ABI_WIN
+/* -- Windows/x64 calling conventions ------------------------------------- */
+
+#define CCALL_HANDLE_STRUCTRET \
+  /* Return structs of size 1, 2, 4 or 8 in a GPR. */ \
+  cc->retref = !(sz == 1 || sz == 2 || sz == 4 || sz == 8); \
+  if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
+
+#define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET
+
+#define CCALL_HANDLE_COMPLEXRET2 \
+  if (!cc->retref) \
+    *(int64_t *)dp = *(int64_t *)sp;  /* Copy complex float from GPRs. */
+
+#define CCALL_HANDLE_STRUCTARG \
+  /* Pass structs of size 1, 2, 4 or 8 in a GPR by value. */ \
+  if (!(sz == 1 || sz == 2 || sz == 4 || sz == 8)) { \
+    rp = cdataptr(lj_cdata_new(cts, did, sz)); \
+    sz = CTSIZE_PTR;  /* Pass all other structs by reference. */ \
+  }
+
+#define CCALL_HANDLE_COMPLEXARG \
+  /* Pass complex float in a GPR and complex double by reference. */ \
+  if (sz != 2*sizeof(float)) { \
+    rp = cdataptr(lj_cdata_new(cts, did, sz)); \
+    sz = CTSIZE_PTR; \
+  }
+
+/* Windows/x64 argument registers are strictly positional (use ngpr). */
+#define CCALL_HANDLE_REGARG \
+  if (isfp) { \
+    if (ngpr < maxgpr) { dp = &cc->fpr[ngpr++]; nfpr = ngpr; goto done; } \
+  } else { \
+    if (ngpr < maxgpr) { dp = &cc->gpr[ngpr++]; goto done; } \
+  }
+
+#elif LJ_TARGET_X64
+/* -- POSIX/x64 calling conventions --------------------------------------- */
+
+#define CCALL_HANDLE_STRUCTRET \
+  int rcl[2]; rcl[0] = rcl[1] = 0; \
+  if (ccall_classify_struct(cts, ctr, rcl, 0)) { \
+    cc->retref = 1;  /* Return struct by reference. */ \
+    cc->gpr[ngpr++] = (GPRArg)dp; \
+  } else { \
+    cc->retref = 0;  /* Return small structs in registers. */ \
+  }
+
+#define CCALL_HANDLE_STRUCTRET2 \
+  int rcl[2]; rcl[0] = rcl[1] = 0; \
+  ccall_classify_struct(cts, ctr, rcl, 0); \
+  ccall_struct_ret(cc, rcl, dp, ctr->size);
+
+#define CCALL_HANDLE_COMPLEXRET \
+  /* Complex values are returned in one or two FPRs. */ \
+  cc->retref = 0;
+
+#define CCALL_HANDLE_COMPLEXRET2 \
+  if (ctr->size == 2*sizeof(float)) {  /* Copy complex float from FPR. */ \
+    *(int64_t *)dp = cc->fpr[0].l[0]; \
+  } else {  /* Copy non-contiguous complex double from FPRs. */ \
+    ((int64_t *)dp)[0] = cc->fpr[0].l[0]; \
+    ((int64_t *)dp)[1] = cc->fpr[1].l[0]; \
+  }
+
+#define CCALL_HANDLE_STRUCTARG \
+  int rcl[2]; rcl[0] = rcl[1] = 0; \
+  if (!ccall_classify_struct(cts, d, rcl, 0)) { \
+    cc->nsp = nsp; cc->ngpr = ngpr; cc->nfpr = nfpr; \
+    if (ccall_struct_arg(cc, cts, d, rcl, o, narg)) goto err_nyi; \
+    nsp = cc->nsp; ngpr = cc->ngpr; nfpr = cc->nfpr; \
+    continue; \
+  }  /* Pass all other structs by value on stack. */
+
+#define CCALL_HANDLE_COMPLEXARG \
+  isfp = 2;  /* Pass complex in FPRs or on stack. Needs postprocessing. */
+
+#define CCALL_HANDLE_REGARG \
+  if (isfp) {  /* Try to pass argument in FPRs. */ \
+    int n2 = ctype_isvector(d->info) ? 1 : n; \
+    if (nfpr + n2 <= CCALL_NARG_FPR) { \
+      dp = &cc->fpr[nfpr]; \
+      nfpr += n2; \
+      goto done; \
+    } \
+  } else {  /* Try to pass argument in GPRs. */ \
+    /* Note that reordering is explicitly allowed in the x64 ABI. */ \
+    if (n <= 2 && ngpr + n <= maxgpr) { \
+      dp = &cc->gpr[ngpr]; \
+      ngpr += n; \
+      goto done; \
+    } \
+  }
+
+#elif LJ_TARGET_ARM
+/* -- ARM calling conventions --------------------------------------------- */
+
+#if LJ_ABI_SOFTFP
+
+#define CCALL_HANDLE_STRUCTRET \
+  /* Return structs of size <= 4 in a GPR. */ \
+  cc->retref = !(sz <= 4); \
+  if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
+
+#define CCALL_HANDLE_COMPLEXRET \
+  cc->retref = 1;  /* Return all complex values by reference. */ \
+  cc->gpr[ngpr++] = (GPRArg)dp;
+
+#define CCALL_HANDLE_COMPLEXRET2 \
+  UNUSED(dp); /* Nothing to do. */
+
+#define CCALL_HANDLE_STRUCTARG \
+  /* Pass all structs by value in registers and/or on the stack. */
+
+#define CCALL_HANDLE_COMPLEXARG \
+  /* Pass complex by value in 2 or 4 GPRs. */
+
+#define CCALL_HANDLE_REGARG_FP1
+#define CCALL_HANDLE_REGARG_FP2
+
+#else
+
+#define CCALL_HANDLE_STRUCTRET \
+  cc->retref = !ccall_classify_struct(cts, ctr, ct); \
+  if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
+
+#define CCALL_HANDLE_STRUCTRET2 \
+  if (ccall_classify_struct(cts, ctr, ct) > 1) sp = (uint8_t *)&cc->fpr[0]; \
+  memcpy(dp, sp, ctr->size);
+
+#define CCALL_HANDLE_COMPLEXRET \
+  if (!(ct->info & CTF_VARARG)) cc->retref = 0;  /* Return complex in FPRs. */
+
+#define CCALL_HANDLE_COMPLEXRET2 \
+  if (!(ct->info & CTF_VARARG)) memcpy(dp, &cc->fpr[0], ctr->size);
+
+#define CCALL_HANDLE_STRUCTARG \
+  isfp = (ccall_classify_struct(cts, d, ct) > 1);
+  /* Pass all structs by value in registers and/or on the stack. */
+
+#define CCALL_HANDLE_COMPLEXARG \
+  isfp = 1;  /* Pass complex by value in FPRs or on stack. */
+
+#define CCALL_HANDLE_REGARG_FP1 \
+  if (isfp && !(ct->info & CTF_VARARG)) { \
+    if ((d->info & CTF_ALIGN) > CTALIGN_PTR) { \
+      if (nfpr + (n >> 1) <= CCALL_NARG_FPR) { \
+	dp = &cc->fpr[nfpr]; \
+	nfpr += (n >> 1); \
+	goto done; \
+      } \
+    } else { \
+      if (sz > 1 && fprodd != nfpr) fprodd = 0; \
+      if (fprodd) { \
+	if (2*nfpr+n <= 2*CCALL_NARG_FPR+1) { \
+	  dp = (void *)&cc->fpr[fprodd-1].f[1]; \
+	  nfpr += (n >> 1); \
+	  if ((n & 1)) fprodd = 0; else fprodd = nfpr-1; \
+	  goto done; \
+	} \
+      } else { \
+	if (2*nfpr+n <= 2*CCALL_NARG_FPR) { \
+	  dp = (void *)&cc->fpr[nfpr]; \
+	  nfpr += (n >> 1); \
+	  if ((n & 1)) fprodd = ++nfpr; else fprodd = 0; \
+	  goto done; \
+	} \
+      } \
+    } \
+    fprodd = 0;  /* No reordering after the first FP value is on stack. */ \
+  } else {
+
+#define CCALL_HANDLE_REGARG_FP2	}
+
+#endif
+
+#define CCALL_HANDLE_REGARG \
+  CCALL_HANDLE_REGARG_FP1 \
+  if ((d->info & CTF_ALIGN) > CTALIGN_PTR) { \
+    if (ngpr < maxgpr) \
+      ngpr = (ngpr + 1u) & ~1u;  /* Align to regpair. */ \
+  } \
+  if (ngpr < maxgpr) { \
+    dp = &cc->gpr[ngpr]; \
+    if (ngpr + n > maxgpr) { \
+      nsp += ngpr + n - maxgpr;  /* Assumes contiguous gpr/stack fields. */ \
+      if (nsp > CCALL_MAXSTACK) goto err_nyi;  /* Too many arguments. */ \
+      ngpr = maxgpr; \
+    } else { \
+      ngpr += n; \
+    } \
+    goto done; \
+  } CCALL_HANDLE_REGARG_FP2
+
+#define CCALL_HANDLE_RET \
+  if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0];
+
+#elif LJ_TARGET_PPC
+/* -- PPC calling conventions --------------------------------------------- */
+
+#define CCALL_HANDLE_STRUCTRET \
+  cc->retref = 1;  /* Return all structs by reference. */ \
+  cc->gpr[ngpr++] = (GPRArg)dp;
+
+#define CCALL_HANDLE_COMPLEXRET \
+  /* Complex values are returned in 2 or 4 GPRs. */ \
+  cc->retref = 0;
+
+#define CCALL_HANDLE_COMPLEXRET2 \
+  memcpy(dp, sp, ctr->size);  /* Copy complex from GPRs. */
+
+#define CCALL_HANDLE_STRUCTARG \
+  rp = cdataptr(lj_cdata_new(cts, did, sz)); \
+  sz = CTSIZE_PTR;  /* Pass all structs by reference. */
+
+#define CCALL_HANDLE_COMPLEXARG \
+  /* Pass complex by value in 2 or 4 GPRs. */
+
+#define CCALL_HANDLE_REGARG \
+  if (isfp) {  /* Try to pass argument in FPRs. */ \
+    if (nfpr + 1 <= CCALL_NARG_FPR) { \
+      dp = &cc->fpr[nfpr]; \
+      nfpr += 1; \
+      d = ctype_get(cts, CTID_DOUBLE);  /* FPRs always hold doubles. */ \
+      goto done; \
+    } \
+  } else {  /* Try to pass argument in GPRs. */ \
+    if (n > 1) { \
+      lua_assert(n == 2 || n == 4);  /* int64_t or complex (float). */ \
+      if (ctype_isinteger(d->info)) \
+	ngpr = (ngpr + 1u) & ~1u;  /* Align int64_t to regpair. */ \
+      else if (ngpr + n > maxgpr) \
+	ngpr = maxgpr;  /* Prevent reordering. */ \
+    } \
+    if (ngpr + n <= maxgpr) { \
+      dp = &cc->gpr[ngpr]; \
+      ngpr += n; \
+      goto done; \
+    } \
+  }
+
+#define CCALL_HANDLE_RET \
+  if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
+    ctr = ctype_get(cts, CTID_DOUBLE);  /* FPRs always hold doubles. */
+
+#elif LJ_TARGET_PPCSPE
+/* -- PPC/SPE calling conventions ----------------------------------------- */
+
+#define CCALL_HANDLE_STRUCTRET \
+  cc->retref = 1;  /* Return all structs by reference. */ \
+  cc->gpr[ngpr++] = (GPRArg)dp;
+
+#define CCALL_HANDLE_COMPLEXRET \
+  /* Complex values are returned in 2 or 4 GPRs. */ \
+  cc->retref = 0;
+
+#define CCALL_HANDLE_COMPLEXRET2 \
+  memcpy(dp, sp, ctr->size);  /* Copy complex from GPRs. */
+
+#define CCALL_HANDLE_STRUCTARG \
+  rp = cdataptr(lj_cdata_new(cts, did, sz)); \
+  sz = CTSIZE_PTR;  /* Pass all structs by reference. */
+
+#define CCALL_HANDLE_COMPLEXARG \
+  /* Pass complex by value in 2 or 4 GPRs. */
+
+/* PPC/SPE has a softfp ABI. */
+#define CCALL_HANDLE_REGARG \
+  if (n > 1) {  /* Doesn't fit in a single GPR? */ \
+    lua_assert(n == 2 || n == 4);  /* int64_t, double or complex (float). */ \
+    if (n == 2) \
+      ngpr = (ngpr + 1u) & ~1u;  /* Only align 64 bit value to regpair. */ \
+    else if (ngpr + n > maxgpr) \
+      ngpr = maxgpr;  /* Prevent reordering. */ \
+  } \
+  if (ngpr + n <= maxgpr) { \
+    dp = &cc->gpr[ngpr]; \
+    ngpr += n; \
+    goto done; \
+  }
+
+#elif LJ_TARGET_MIPS
+/* -- MIPS calling conventions -------------------------------------------- */
+
+#define CCALL_HANDLE_STRUCTRET \
+  cc->retref = 1;  /* Return all structs by reference. */ \
+  cc->gpr[ngpr++] = (GPRArg)dp;
+
+#define CCALL_HANDLE_COMPLEXRET \
+  /* Complex values are returned in 1 or 2 FPRs. */ \
+  cc->retref = 0;
+
+#define CCALL_HANDLE_COMPLEXRET2 \
+  if (ctr->size == 2*sizeof(float)) {  /* Copy complex float from FPRs. */ \
+    ((float *)dp)[0] = cc->fpr[0].f; \
+    ((float *)dp)[1] = cc->fpr[1].f; \
+  } else {  /* Copy complex double from FPRs. */ \
+    ((double *)dp)[0] = cc->fpr[0].d; \
+    ((double *)dp)[1] = cc->fpr[1].d; \
+  }
+
+#define CCALL_HANDLE_STRUCTARG \
+  /* Pass all structs by value in registers and/or on the stack. */
+
+#define CCALL_HANDLE_COMPLEXARG \
+  /* Pass complex by value in 2 or 4 GPRs. */
+
+#define CCALL_HANDLE_REGARG \
+  if (isfp && nfpr < CCALL_NARG_FPR && !(ct->info & CTF_VARARG)) { \
+    /* Try to pass argument in FPRs. */ \
+    dp = n == 1 ? (void *)&cc->fpr[nfpr].f : (void *)&cc->fpr[nfpr].d; \
+    nfpr++; ngpr += n; \
+    goto done; \
+  } else {  /* Try to pass argument in GPRs. */ \
+    nfpr = CCALL_NARG_FPR; \
+    if ((d->info & CTF_ALIGN) > CTALIGN_PTR) \
+      ngpr = (ngpr + 1u) & ~1u;  /* Align to regpair. */ \
+    if (ngpr < maxgpr) { \
+      dp = &cc->gpr[ngpr]; \
+      if (ngpr + n > maxgpr) { \
+	nsp += ngpr + n - maxgpr;  /* Assumes contiguous gpr/stack fields. */ \
+	if (nsp > CCALL_MAXSTACK) goto err_nyi;  /* Too many arguments. */ \
+	ngpr = maxgpr; \
+      } else { \
+	ngpr += n; \
+      } \
+      goto done; \
+    } \
+  }
+
+#define CCALL_HANDLE_RET \
+  if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
+    sp = (uint8_t *)&cc->fpr[0].f;
+
+#else
+#error "Missing calling convention definitions for this architecture"
+#endif
+
+#ifndef CCALL_HANDLE_STRUCTRET2
+#define CCALL_HANDLE_STRUCTRET2 \
+  memcpy(dp, sp, ctr->size);  /* Copy struct return value from GPRs. */
+#endif
+
+/* -- x86 OSX ABI struct classification ----------------------------------- */
+
+#if LJ_TARGET_X86 && LJ_TARGET_OSX
+
+/* Check for struct with single FP field. */
+static int ccall_classify_struct(CTState *cts, CType *ct)
+{
+  CTSize sz = ct->size;
+  if (!(sz == sizeof(float) || sz == sizeof(double))) return 0;
+  if ((ct->info & CTF_UNION)) return 0;
+  while (ct->sib) {
+    ct = ctype_get(cts, ct->sib);
+    if (ctype_isfield(ct->info)) {
+      CType *sct = ctype_rawchild(cts, ct);
+      if (ctype_isfp(sct->info)) {
+	if (sct->size == sz)
+	  return (sz >> 2);  /* Return 1 for float or 2 for double. */
+      } else if (ctype_isstruct(sct->info)) {
+	if (sct->size)
+	  return ccall_classify_struct(cts, sct);
+      } else {
+	break;
+      }
+    } else if (ctype_isbitfield(ct->info)) {
+      break;
+    } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
+      CType *sct = ctype_rawchild(cts, ct);
+      if (sct->size)
+	return ccall_classify_struct(cts, sct);
+    }
+  }
+  return 0;
+}
+
+#endif
+
+/* -- x64 struct classification ------------------------------------------- */
+
+#if LJ_TARGET_X64 && !LJ_ABI_WIN
+
+/* Register classes for x64 struct classification. */
+#define CCALL_RCL_INT	1
+#define CCALL_RCL_SSE	2
+#define CCALL_RCL_MEM	4
+/* NYI: classify vectors. */
+
+static int ccall_classify_struct(CTState *cts, CType *ct, int *rcl, CTSize ofs);
+
+/* Classify a C type. */
+static void ccall_classify_ct(CTState *cts, CType *ct, int *rcl, CTSize ofs)
+{
+  if (ctype_isarray(ct->info)) {
+    CType *cct = ctype_rawchild(cts, ct);
+    CTSize eofs, esz = cct->size, asz = ct->size;
+    for (eofs = 0; eofs < asz; eofs += esz)
+      ccall_classify_ct(cts, cct, rcl, ofs+eofs);
+  } else if (ctype_isstruct(ct->info)) {
+    ccall_classify_struct(cts, ct, rcl, ofs);
+  } else {
+    int cl = ctype_isfp(ct->info) ? CCALL_RCL_SSE : CCALL_RCL_INT;
+    lua_assert(ctype_hassize(ct->info));
+    if ((ofs & (ct->size-1))) cl = CCALL_RCL_MEM;  /* Unaligned. */
+    rcl[(ofs >= 8)] |= cl;
+  }
+}
+
+/* Recursively classify a struct based on its fields. */
+static int ccall_classify_struct(CTState *cts, CType *ct, int *rcl, CTSize ofs)
+{
+  if (ct->size > 16) return CCALL_RCL_MEM;  /* Too big, gets memory class. */
+  while (ct->sib) {
+    CTSize fofs;
+    ct = ctype_get(cts, ct->sib);
+    fofs = ofs+ct->size;
+    if (ctype_isfield(ct->info))
+      ccall_classify_ct(cts, ctype_rawchild(cts, ct), rcl, fofs);
+    else if (ctype_isbitfield(ct->info))
+      rcl[(fofs >= 8)] |= CCALL_RCL_INT;  /* NYI: unaligned bitfields? */
+    else if (ctype_isxattrib(ct->info, CTA_SUBTYPE))
+      ccall_classify_struct(cts, ctype_rawchild(cts, ct), rcl, fofs);
+  }
+  return ((rcl[0]|rcl[1]) & CCALL_RCL_MEM);  /* Memory class? */
+}
+
+/* Try to split up a small struct into registers. */
+static int ccall_struct_reg(CCallState *cc, GPRArg *dp, int *rcl)
+{
+  MSize ngpr = cc->ngpr, nfpr = cc->nfpr;
+  uint32_t i;
+  for (i = 0; i < 2; i++) {
+    lua_assert(!(rcl[i] & CCALL_RCL_MEM));
+    if ((rcl[i] & CCALL_RCL_INT)) {  /* Integer class takes precedence. */
+      if (ngpr >= CCALL_NARG_GPR) return 1;  /* Register overflow. */
+      cc->gpr[ngpr++] = dp[i];
+    } else if ((rcl[i] & CCALL_RCL_SSE)) {
+      if (nfpr >= CCALL_NARG_FPR) return 1;  /* Register overflow. */
+      cc->fpr[nfpr++].l[0] = dp[i];
+    }
+  }
+  cc->ngpr = ngpr; cc->nfpr = nfpr;
+  return 0;  /* Ok. */
+}
+
+/* Pass a small struct argument. */
+static int ccall_struct_arg(CCallState *cc, CTState *cts, CType *d, int *rcl,
+			    TValue *o, int narg)
+{
+  GPRArg dp[2];
+  dp[0] = dp[1] = 0;
+  /* Convert to temp. struct. */
+  lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg));
+  if (ccall_struct_reg(cc, dp, rcl)) {  /* Register overflow? Pass on stack. */
+    MSize nsp = cc->nsp, n = rcl[1] ? 2 : 1;
+    if (nsp + n > CCALL_MAXSTACK) return 1;  /* Too many arguments. */
+    cc->nsp = nsp + n;
+    memcpy(&cc->stack[nsp], dp, n*CTSIZE_PTR);
+  }
+  return 0;  /* Ok. */
+}
+
+/* Combine returned small struct. */
+static void ccall_struct_ret(CCallState *cc, int *rcl, uint8_t *dp, CTSize sz)
+{
+  GPRArg sp[2];
+  MSize ngpr = 0, nfpr = 0;
+  uint32_t i;
+  for (i = 0; i < 2; i++) {
+    if ((rcl[i] & CCALL_RCL_INT)) {  /* Integer class takes precedence. */
+      sp[i] = cc->gpr[ngpr++];
+    } else if ((rcl[i] & CCALL_RCL_SSE)) {
+      sp[i] = cc->fpr[nfpr++].l[0];
+    }
+  }
+  memcpy(dp, sp, sz);
+}
+#endif
+
+/* -- ARM hard-float ABI struct classification ---------------------------- */
+
+#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
+
+/* Classify a struct based on its fields. */
+static unsigned int ccall_classify_struct(CTState *cts, CType *ct, CType *ctf)
+{
+  CTSize sz = ct->size;
+  unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION);
+  if ((ctf->info & CTF_VARARG)) goto noth;
+  while (ct->sib) {
+    CType *sct;
+    ct = ctype_get(cts, ct->sib);
+    if (ctype_isfield(ct->info)) {
+      sct = ctype_rawchild(cts, ct);
+      if (ctype_isfp(sct->info)) {
+	r |= sct->size;
+	if (!isu) n++; else if (n == 0) n = 1;
+      } else if (ctype_iscomplex(sct->info)) {
+	r |= (sct->size >> 1);
+	if (!isu) n += 2; else if (n < 2) n = 2;
+      } else if (ctype_isstruct(sct->info)) {
+	goto substruct;
+      } else {
+	goto noth;
+      }
+    } else if (ctype_isbitfield(ct->info)) {
+      goto noth;
+    } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
+      sct = ctype_rawchild(cts, ct);
+    substruct:
+      if (sct->size > 0) {
+	unsigned int s = ccall_classify_struct(cts, sct, ctf);
+	if (s <= 1) goto noth;
+	r |= (s & 255);
+	if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8);
+      }
+    }
+  }
+  if ((r == 4 || r == 8) && n <= 4)
+    return r + (n << 8);
+noth:  /* Not a homogeneous float/double aggregate. */
+  return (sz <= 4);  /* Return structs of size <= 4 in a GPR. */
+}
+
+#endif
+
+/* -- Common C call handling ---------------------------------------------- */
+
+/* Infer the destination CTypeID for a vararg argument. */
+CTypeID lj_ccall_ctid_vararg(CTState *cts, cTValue *o)
+{
+  if (tvisnumber(o)) {
+    return CTID_DOUBLE;
+  } else if (tviscdata(o)) {
+    CTypeID id = cdataV(o)->ctypeid;
+    CType *s = ctype_get(cts, id);
+    if (ctype_isrefarray(s->info)) {
+      return lj_ctype_intern(cts,
+	       CTINFO(CT_PTR, CTALIGN_PTR|ctype_cid(s->info)), CTSIZE_PTR);
+    } else if (ctype_isstruct(s->info) || ctype_isfunc(s->info)) {
+      /* NYI: how to pass a struct by value in a vararg argument? */
+      return lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|id), CTSIZE_PTR);
+    } else if (ctype_isfp(s->info) && s->size == sizeof(float)) {
+      return CTID_DOUBLE;
+    } else {
+      return id;
+    }
+  } else if (tvisstr(o)) {
+    return CTID_P_CCHAR;
+  } else if (tvisbool(o)) {
+    return CTID_BOOL;
+  } else {
+    return CTID_P_VOID;
+  }
+}
+
+/* Setup arguments for C call. */
+static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
+			  CCallState *cc)
+{
+  int gcsteps = 0;
+  TValue *o, *top = L->top;
+  CTypeID fid;
+  CType *ctr;
+  MSize maxgpr, ngpr = 0, nsp = 0, narg;
+#if CCALL_NARG_FPR
+  MSize nfpr = 0;
+#if LJ_TARGET_ARM
+  MSize fprodd = 0;
+#endif
+#endif
+
+  /* Clear unused regs to get some determinism in case of misdeclaration. */
+  memset(cc->gpr, 0, sizeof(cc->gpr));
+#if CCALL_NUM_FPR
+  memset(cc->fpr, 0, sizeof(cc->fpr));
+#endif
+
+#if LJ_TARGET_X86
+  /* x86 has several different calling conventions. */
+  cc->resx87 = 0;
+  switch (ctype_cconv(ct->info)) {
+  case CTCC_FASTCALL: maxgpr = 2; break;
+  case CTCC_THISCALL: maxgpr = 1; break;
+  default: maxgpr = 0; break;
+  }
+#else
+  maxgpr = CCALL_NARG_GPR;
+#endif
+
+  /* Perform required setup for some result types. */
+  ctr = ctype_rawchild(cts, ct);
+  if (ctype_isvector(ctr->info)) {
+    if (!(CCALL_VECTOR_REG && (ctr->size == 8 || ctr->size == 16)))
+      goto err_nyi;
+  } else if (ctype_iscomplex(ctr->info) || ctype_isstruct(ctr->info)) {
+    /* Preallocate cdata object and anchor it after arguments. */
+    CTSize sz = ctr->size;
+    GCcdata *cd = lj_cdata_new(cts, ctype_cid(ct->info), sz);
+    void *dp = cdataptr(cd);
+    setcdataV(L, L->top++, cd);
+    if (ctype_isstruct(ctr->info)) {
+      CCALL_HANDLE_STRUCTRET
+    } else {
+      CCALL_HANDLE_COMPLEXRET
+    }
+#if LJ_TARGET_X86
+  } else if (ctype_isfp(ctr->info)) {
+    cc->resx87 = ctr->size == sizeof(float) ? 1 : 2;
+#endif
+  }
+
+  /* Skip initial attributes. */
+  fid = ct->sib;
+  while (fid) {
+    CType *ctf = ctype_get(cts, fid);
+    if (!ctype_isattrib(ctf->info)) break;
+    fid = ctf->sib;
+  }
+
+  /* Walk through all passed arguments. */
+  for (o = L->base+1, narg = 1; o < top; o++, narg++) {
+    CTypeID did;
+    CType *d;
+    CTSize sz;
+    MSize n, isfp = 0, isva = 0;
+    void *dp, *rp = NULL;
+
+    if (fid) {  /* Get argument type from field. */
+      CType *ctf = ctype_get(cts, fid);
+      fid = ctf->sib;
+      lua_assert(ctype_isfield(ctf->info));
+      did = ctype_cid(ctf->info);
+    } else {
+      if (!(ct->info & CTF_VARARG))
+	lj_err_caller(L, LJ_ERR_FFI_NUMARG);  /* Too many arguments. */
+      did = lj_ccall_ctid_vararg(cts, o);  /* Infer vararg type. */
+      isva = 1;
+    }
+    d = ctype_raw(cts, did);
+    sz = d->size;
+
+    /* Find out how (by value/ref) and where (GPR/FPR) to pass an argument. */
+    if (ctype_isnum(d->info)) {
+      if (sz > 8) goto err_nyi;
+      if ((d->info & CTF_FP))
+	isfp = 1;
+    } else if (ctype_isvector(d->info)) {
+      if (CCALL_VECTOR_REG && (sz == 8 || sz == 16))
+	isfp = 1;
+      else
+	goto err_nyi;
+    } else if (ctype_isstruct(d->info)) {
+      CCALL_HANDLE_STRUCTARG
+    } else if (ctype_iscomplex(d->info)) {
+      CCALL_HANDLE_COMPLEXARG
+    } else {
+      sz = CTSIZE_PTR;
+    }
+    sz = (sz + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
+    n = sz / CTSIZE_PTR;  /* Number of GPRs or stack slots needed. */
+
+    CCALL_HANDLE_REGARG  /* Handle register arguments. */
+
+    /* Otherwise pass argument on stack. */
+    if (CCALL_ALIGN_STACKARG && !rp && (d->info & CTF_ALIGN) > CTALIGN_PTR) {
+      MSize align = (1u << ctype_align(d->info-CTALIGN_PTR)) -1;
+      nsp = (nsp + align) & ~align;  /* Align argument on stack. */
+    }
+    if (nsp + n > CCALL_MAXSTACK) {  /* Too many arguments. */
+    err_nyi:
+      lj_err_caller(L, LJ_ERR_FFI_NYICALL);
+    }
+    dp = &cc->stack[nsp];
+    nsp += n;
+    isva = 0;
+
+  done:
+    if (rp) {  /* Pass by reference. */
+      gcsteps++;
+      *(void **)dp = rp;
+      dp = rp;
+    }
+    lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg));
+    /* Extend passed integers to 32 bits at least. */
+    if (ctype_isinteger_or_bool(d->info) && d->size < 4) {
+      if (d->info & CTF_UNSIGNED)
+	*(uint32_t *)dp = d->size == 1 ? (uint32_t)*(uint8_t *)dp :
+					 (uint32_t)*(uint16_t *)dp;
+      else
+	*(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp :
+					(int32_t)*(int16_t *)dp;
+    }
+#if LJ_TARGET_X64 && LJ_ABI_WIN
+    if (isva) {  /* Windows/x64 mirrors varargs in both register sets. */
+      if (nfpr == ngpr)
+	cc->gpr[ngpr-1] = cc->fpr[ngpr-1].l[0];
+      else
+	cc->fpr[ngpr-1].l[0] = cc->gpr[ngpr-1];
+    }
+#else
+    UNUSED(isva);
+#endif
+#if LJ_TARGET_X64 && !LJ_ABI_WIN
+    if (isfp == 2 && n == 2 && (uint8_t *)dp == (uint8_t *)&cc->fpr[nfpr-2]) {
+      cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1];  /* Split complex double. */
+      cc->fpr[nfpr-2].d[1] = 0;
+    }
+#else
+    UNUSED(isfp);
+#endif
+  }
+  if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG);  /* Too few arguments. */
+
+#if LJ_TARGET_X64 || LJ_TARGET_PPC
+  cc->nfpr = nfpr;  /* Required for vararg functions. */
+#endif
+  cc->nsp = nsp;
+  cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA)*CTSIZE_PTR;
+  if (nsp > CCALL_SPS_FREE)
+    cc->spadj += (((nsp-CCALL_SPS_FREE)*CTSIZE_PTR + 15u) & ~15u);
+  return gcsteps;
+}
+
+/* Get results from C call. */
+static int ccall_get_results(lua_State *L, CTState *cts, CType *ct,
+			     CCallState *cc, int *ret)
+{
+  CType *ctr = ctype_rawchild(cts, ct);
+  uint8_t *sp = (uint8_t *)&cc->gpr[0];
+  if (ctype_isvoid(ctr->info)) {
+    *ret = 0;  /* Zero results. */
+    return 0;  /* No additional GC step. */
+  }
+  *ret = 1;  /* One result. */
+  if (ctype_isstruct(ctr->info)) {
+    /* Return cdata object which is already on top of stack. */
+    if (!cc->retref) {
+      void *dp = cdataptr(cdataV(L->top-1));  /* Use preallocated object. */
+      CCALL_HANDLE_STRUCTRET2
+    }
+    return 1;  /* One GC step. */
+  }
+  if (ctype_iscomplex(ctr->info)) {
+    /* Return cdata object which is already on top of stack. */
+    void *dp = cdataptr(cdataV(L->top-1));  /* Use preallocated object. */
+    CCALL_HANDLE_COMPLEXRET2
+    return 1;  /* One GC step. */
+  }
+  if (LJ_BE && ctype_isinteger_or_bool(ctr->info) && ctr->size < CTSIZE_PTR)
+    sp += (CTSIZE_PTR - ctr->size);
+#if CCALL_NUM_FPR
+  if (ctype_isfp(ctr->info) || ctype_isvector(ctr->info))
+    sp = (uint8_t *)&cc->fpr[0];
+#endif
+#ifdef CCALL_HANDLE_RET
+  CCALL_HANDLE_RET
+#endif
+  /* No reference types end up here, so there's no need for the CTypeID. */
+  lua_assert(!(ctype_isrefarray(ctr->info) || ctype_isstruct(ctr->info)));
+  return lj_cconv_tv_ct(cts, ctr, 0, L->top-1, sp);
+}
+
+/* Call C function. */
+int lj_ccall_func(lua_State *L, GCcdata *cd)
+{
+  CTState *cts = ctype_cts(L);
+  CType *ct = ctype_raw(cts, cd->ctypeid);
+  CTSize sz = CTSIZE_PTR;
+  if (ctype_isptr(ct->info)) {
+    sz = ct->size;
+    ct = ctype_rawchild(cts, ct);
+  }
+  if (ctype_isfunc(ct->info)) {
+    CCallState cc;
+    int gcsteps, ret;
+    cc.func = (void (*)(void))cdata_getptr(cdataptr(cd), sz);
+    gcsteps = ccall_set_args(L, cts, ct, &cc);
+    ct = (CType *)((intptr_t)ct-(intptr_t)cts->tab);
+    cts->cb.slot = ~0u;
+    lj_vm_ffi_call(&cc);
+    if (cts->cb.slot != ~0u) {  /* Blacklist function that called a callback. */
+      TValue tv;
+      setlightudV(&tv, (void *)cc.func);
+      setboolV(lj_tab_set(L, cts->miscmap, &tv), 1);
+    }
+    ct = (CType *)((intptr_t)ct+(intptr_t)cts->tab);  /* May be reallocated. */
+    gcsteps += ccall_get_results(L, cts, ct, &cc, &ret);
+#if LJ_TARGET_X86 && LJ_ABI_WIN
+    /* Automatically detect __stdcall and fix up C function declaration. */
+    if (cc.spadj && ctype_cconv(ct->info) == CTCC_CDECL) {
+      CTF_INSERT(ct->info, CCONV, CTCC_STDCALL);
+      lj_trace_abort(G(L));
+    }
+#endif
+    while (gcsteps-- > 0)
+      lj_gc_check(L);
+    return ret;
+  }
+  return -1;  /* Not a function. */
+}
+
+#endif

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/1f27b840/lib/luajit/src/lj_ccall.h
----------------------------------------------------------------------
diff --git a/lib/luajit/src/lj_ccall.h b/lib/luajit/src/lj_ccall.h
new file mode 100644
index 0000000..f553010
--- /dev/null
+++ b/lib/luajit/src/lj_ccall.h
@@ -0,0 +1,171 @@
+/*
+** FFI C call handling.
+** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_CCALL_H
+#define _LJ_CCALL_H
+
+#include "lj_obj.h"
+#include "lj_ctype.h"
+
+#if LJ_HASFFI
+
+/* -- C calling conventions ----------------------------------------------- */
+
+#if LJ_TARGET_X86ORX64
+
+#if LJ_TARGET_X86
+#define CCALL_NARG_GPR		2	/* For fastcall arguments. */
+#define CCALL_NARG_FPR		0
+#define CCALL_NRET_GPR		2
+#define CCALL_NRET_FPR		1	/* For FP results on x87 stack. */
+#define CCALL_ALIGN_STACKARG	0	/* Don't align argument on stack. */
+#elif LJ_ABI_WIN
+#define CCALL_NARG_GPR		4
+#define CCALL_NARG_FPR		4
+#define CCALL_NRET_GPR		1
+#define CCALL_NRET_FPR		1
+#define CCALL_SPS_EXTRA		4
+#else
+#define CCALL_NARG_GPR		6
+#define CCALL_NARG_FPR		8
+#define CCALL_NRET_GPR		2
+#define CCALL_NRET_FPR		2
+#define CCALL_VECTOR_REG	1	/* Pass vectors in registers. */
+#endif
+
+#define CCALL_SPS_FREE		1
+#define CCALL_ALIGN_CALLSTATE	16
+
+typedef LJ_ALIGN(16) union FPRArg {
+  double d[2];
+  float f[4];
+  uint8_t b[16];
+  uint16_t s[8];
+  int i[4];
+  int64_t l[2];
+} FPRArg;
+
+typedef intptr_t GPRArg;
+
+#elif LJ_TARGET_ARM
+
+#define CCALL_NARG_GPR		4
+#define CCALL_NRET_GPR		2	/* For softfp double. */
+#if LJ_ABI_SOFTFP
+#define CCALL_NARG_FPR		0
+#define CCALL_NRET_FPR		0
+#else
+#define CCALL_NARG_FPR		8
+#define CCALL_NRET_FPR		4
+#endif
+#define CCALL_SPS_FREE		0
+
+typedef intptr_t GPRArg;
+typedef union FPRArg {
+  double d;
+  float f[2];
+} FPRArg;
+
+#elif LJ_TARGET_PPC
+
+#define CCALL_NARG_GPR		8
+#define CCALL_NARG_FPR		8
+#define CCALL_NRET_GPR		4	/* For complex double. */
+#define CCALL_NRET_FPR		1
+#define CCALL_SPS_EXTRA		4
+#define CCALL_SPS_FREE		0
+
+typedef intptr_t GPRArg;
+typedef double FPRArg;
+
+#elif LJ_TARGET_PPCSPE
+
+#define CCALL_NARG_GPR		8
+#define CCALL_NARG_FPR		0
+#define CCALL_NRET_GPR		4	/* For softfp complex double. */
+#define CCALL_NRET_FPR		0
+#define CCALL_SPS_FREE		0	/* NYI */
+
+typedef intptr_t GPRArg;
+
+#elif LJ_TARGET_MIPS
+
+#define CCALL_NARG_GPR		4
+#define CCALL_NARG_FPR		2
+#define CCALL_NRET_GPR		2
+#define CCALL_NRET_FPR		2
+#define CCALL_SPS_EXTRA		7
+#define CCALL_SPS_FREE		1
+
+typedef intptr_t GPRArg;
+typedef union FPRArg {
+  double d;
+  struct { LJ_ENDIAN_LOHI(float f; , float g;) };
+} FPRArg;
+
+#else
+#error "Missing calling convention definitions for this architecture"
+#endif
+
+#ifndef CCALL_SPS_EXTRA
+#define CCALL_SPS_EXTRA		0
+#endif
+#ifndef CCALL_VECTOR_REG
+#define CCALL_VECTOR_REG	0
+#endif
+#ifndef CCALL_ALIGN_STACKARG
+#define CCALL_ALIGN_STACKARG	1
+#endif
+#ifndef CCALL_ALIGN_CALLSTATE
+#define CCALL_ALIGN_CALLSTATE	8
+#endif
+
+#define CCALL_NUM_GPR \
+  (CCALL_NARG_GPR > CCALL_NRET_GPR ? CCALL_NARG_GPR : CCALL_NRET_GPR)
+#define CCALL_NUM_FPR \
+  (CCALL_NARG_FPR > CCALL_NRET_FPR ? CCALL_NARG_FPR : CCALL_NRET_FPR)
+
+/* Check against constants in lj_ctype.h. */
+LJ_STATIC_ASSERT(CCALL_NUM_GPR <= CCALL_MAX_GPR);
+LJ_STATIC_ASSERT(CCALL_NUM_FPR <= CCALL_MAX_FPR);
+
+#define CCALL_MAXSTACK		32
+
+/* -- C call state -------------------------------------------------------- */
+
+typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
+  void (*func)(void);		/* Pointer to called function. */
+  uint32_t spadj;		/* Stack pointer adjustment. */
+  uint8_t nsp;			/* Number of stack slots. */
+  uint8_t retref;		/* Return value by reference. */
+#if LJ_TARGET_X64
+  uint8_t ngpr;			/* Number of arguments in GPRs. */
+  uint8_t nfpr;			/* Number of arguments in FPRs. */
+#elif LJ_TARGET_X86
+  uint8_t resx87;		/* Result on x87 stack: 1:float, 2:double. */
+#elif LJ_TARGET_PPC
+  uint8_t nfpr;			/* Number of arguments in FPRs. */
+#endif
+#if LJ_32
+  int32_t align1;
+#endif
+#if CCALL_NUM_FPR
+  FPRArg fpr[CCALL_NUM_FPR];	/* Arguments/results in FPRs. */
+#endif
+  GPRArg gpr[CCALL_NUM_GPR];	/* Arguments/results in GPRs. */
+  GPRArg stack[CCALL_MAXSTACK];	/* Stack slots. */
+} CCallState;
+
+/* -- C call handling ----------------------------------------------------- */
+
+/* Really belongs to lj_vm.h. */
+LJ_ASMF void LJ_FASTCALL lj_vm_ffi_call(CCallState *cc);
+
+LJ_FUNC CTypeID lj_ccall_ctid_vararg(CTState *cts, cTValue *o);
+LJ_FUNC int lj_ccall_func(lua_State *L, GCcdata *cd);
+
+#endif
+
+#endif