Squashed 'src/deps/src/luajit/' changes from aa038d259..4182d6bf3

4182d6bf3 Merge branch 'v2.1' into v2.1-agentzh
43d0a1915 Fix last commit.
536cf8a27 Merge branch 'master' into v2.1
644723649 x86/x64: Don't fuse loads across IR_NEWREF.
113a168b7 Improve last commit.
45c88b796 x86/x64: Don't fuse loads across table.clear.
6807e60af Merge branch 'master' into v2.1
d854d00ce x86/x64: Add more red zone checks to assembler backend.
7c9671a04 Merge branch 'master' into v2.1
a4c164043 Add stack check to pcall/xpcall.
69bbbf773 Merge branch 'master' into v2.1
65c849390 Invalidate SCEV entry when returning to lower frame.
b94fbfbee Merge branch 'master' into v2.1
433d7e8d8 FFI: Fix pragma push stack limit check and throw on overflow.
ce2cd6173 ARM64: Fix disassembly of ldp/stp offsets.
07b3cd3cf Check for upvalue state transition in IR_UREFO.
0afa1676b Merge branch 'master' into v2.1
d133d67c8 x64: Properly fix __call metamethod return dispatch.
f2e955dae Windows/x86: _BitScan*64 are only available on 64 bit archs.
e826d0c10 Add 'cc' file type for saving bytecode.
4eb47df60 FFI/Windows: Fix type declaration for int64_t and uint64_t.
7269b0213 Merge branch 'master' into v2.1
db944b2b5 FFI: Fix dangling reference to CType in carith_checkarg().
656ecbcf8 DynASM/ARM64: Support ldp/stp of q registers.
d2a5487fd ARM64: Use ADR and ADRP to form constants.
14866a682 ARM64: Fix disassembly of U12 loads.
c5b075eb3 ARM64: Unify constant register handling in interpreter.
9cc8bbb7a ARM: Fix register hint for FFI calls with FP results.
1e93951b2 ARM64: Fix register hint for FFI calls with FP results.
007e4dce1 ARM64: Restore fp before sp in C stack unwinders.

git-subtree-dir: src/deps/src/luajit
git-subtree-split: 4182d6bf37e9f8d1cb5d6e83b1db66de84b95101
This commit is contained in:
Théophile Diot 2023-11-17 10:21:23 +00:00
parent f583d996a1
commit 6ed1ec58b1
19 changed files with 233 additions and 87 deletions

View file

@ -985,8 +985,7 @@ local function disass_ins(ctx)
x = x.."]"
end
elseif p == "P" then
local opcv, sh = rshift(op, 26), 2
if opcv >= 0x2a then sh = 4 elseif opcv >= 0x1b then sh = 3 end
local sh = 2 + rshift(op, 31 - band(rshift(op, 26), 1))
local imm7 = lshift(arshift(lshift(op, 10), 25), sh)
local rn = map_regs.x[band(rshift(op, 5), 31)]
local ind = band(rshift(op, 23), 3)

View file

@ -969,24 +969,32 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) {
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, ARMI_LDR, dest, v);
} else {
Reg uv = ra_scratch(as, RSET_GPR);
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
asm_guardcc(as, CC_NE);
if (guarded) {
asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ);
emit_n(as, ARMI_CMP|ARMI_K12|1, RID_TMP);
emit_opk(as, ARMI_ADD, dest, uv,
(int32_t)offsetof(GCupval, tv), RSET_GPR);
emit_lso(as, ARMI_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
} else {
emit_lso(as, ARMI_LDR, dest, uv, (int32_t)offsetof(GCupval, v));
}
emit_lso(as, ARMI_LDR, uv, func,
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
if (ir->o == IR_UREFC)
emit_opk(as, ARMI_ADD, dest, dest,
(int32_t)offsetof(GCupval, tv), RSET_GPR);
else
emit_lso(as, ARMI_LDR, dest, dest, (int32_t)offsetof(GCupval, v));
if (guarded)
emit_lso(as, ARMI_LDRB, RID_TMP, dest,
(int32_t)offsetof(GCupval, closed));
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loadi(as, dest, k);
} else {
emit_lso(as, ARMI_LDR, dest, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
}
}
}

View file

@ -960,22 +960,30 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) {
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, A64I_LDRx, dest, v);
} else {
if (ir->o == IR_UREFC) {
asm_guardcnb(as, A64I_CBZ, RID_TMP);
if (guarded)
asm_guardcnb(as, ir->o == IR_UREFC ? A64I_CBZ : A64I_CBNZ, RID_TMP);
if (ir->o == IR_UREFC)
emit_opk(as, A64I_ADDx, dest, dest,
(int32_t)offsetof(GCupval, tv), RSET_GPR);
else
emit_lso(as, A64I_LDRx, dest, dest, (int32_t)offsetof(GCupval, v));
if (guarded)
emit_lso(as, A64I_LDRB, RID_TMP, dest,
(int32_t)offsetof(GCupval, closed));
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
uint64_t k = gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loadu64(as, dest, k);
} else {
emit_lso(as, A64I_LDRx, dest, dest, (int32_t)offsetof(GCupval, v));
emit_lso(as, A64I_LDRx, dest, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
}
emit_lso(as, A64I_LDRx, dest, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
}
}

View file

@ -1207,22 +1207,29 @@ nolo:
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) {
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR);
} else {
Reg uv = ra_scratch(as, RSET_GPR);
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
emit_tsi(as, MIPSI_AADDIU, dest, uv, (int32_t)offsetof(GCupval, tv));
emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
if (guarded)
asm_guard(as, ir->o == IR_UREFC ? MIPSI_BEQ : MIPSI_BNE, RID_TMP, RID_ZERO);
if (ir->o == IR_UREFC)
emit_tsi(as, MIPSI_AADDIU, dest, dest, (int32_t)offsetof(GCupval, tv));
else
emit_tsi(as, MIPSI_AL, dest, dest, (int32_t)offsetof(GCupval, v));
if (guarded)
emit_tsi(as, MIPSI_LBU, RID_TMP, dest, (int32_t)offsetof(GCupval, closed));
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loada(as, dest, o);
} else {
emit_tsi(as, MIPSI_AL, dest, uv, (int32_t)offsetof(GCupval, v));
emit_tsi(as, MIPSI_AL, dest, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) +
(int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
}
emit_tsi(as, MIPSI_AL, uv, func, (int32_t)offsetof(GCfuncL, uvptr) +
(int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
}
}

View file

@ -840,23 +840,30 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) {
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, PPCI_LWZ, dest, v, RSET_GPR);
} else {
Reg uv = ra_scratch(as, RSET_GPR);
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
asm_guardcc(as, CC_NE);
if (guarded) {
asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ);
emit_ai(as, PPCI_CMPWI, RID_TMP, 1);
emit_tai(as, PPCI_ADDI, dest, uv, (int32_t)offsetof(GCupval, tv));
emit_tai(as, PPCI_LBZ, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
} else {
emit_tai(as, PPCI_LWZ, dest, uv, (int32_t)offsetof(GCupval, v));
}
emit_tai(as, PPCI_LWZ, uv, func,
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
if (ir->o == IR_UREFC)
emit_tai(as, PPCI_ADDI, dest, dest, (int32_t)offsetof(GCupval, tv));
else
emit_tai(as, PPCI_LWZ, dest, dest, (int32_t)offsetof(GCupval, v));
if (guarded)
emit_tai(as, PPCI_LBZ, RID_TMP, dest, (int32_t)offsetof(GCupval, closed));
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loadi(as, dest, k);
} else {
emit_tai(as, PPCI_LWZ, dest, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
}
}
}

View file

@ -109,7 +109,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
/* Check if there's no conflicting instruction between curins and ref.
** Also avoid fusing loads if there are multiple references.
*/
static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload)
static int noconflict(ASMState *as, IRRef ref, IROp conflict, int check)
{
IRIns *ir = as->ir;
IRRef i = as->curins;
@ -118,7 +118,9 @@ static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload)
while (--i > ref) {
if (ir[i].o == conflict)
return 0; /* Conflict found. */
else if (!noload && (ir[i].op1 == ref || ir[i].op2 == ref))
else if ((check & 1) && (ir[i].o == IR_NEWREF || ir[i].o == IR_CALLS))
return 0;
else if ((check & 2) && (ir[i].op1 == ref || ir[i].op2 == ref))
return 0;
}
return 1; /* Ok, no conflict. */
@ -134,7 +136,7 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref)
lj_assertA(irb->op2 == IRFL_TAB_ARRAY, "expected FLOAD TAB_ARRAY");
/* We can avoid the FLOAD of t->array for colocated arrays. */
if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE &&
!neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 1)) {
!neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 0)) {
as->mrm.ofs = (int32_t)sizeof(GCtab); /* Ofs to colocated array. */
return irb->op1; /* Table obj. */
}
@ -456,7 +458,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
if (ir->o == IR_SLOAD) {
if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) &&
noconflict(as, ref, IR_RETF, 0) &&
noconflict(as, ref, IR_RETF, 2) &&
!(LJ_GC64 && irt_isaddr(ir->t))) {
as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) +
@ -467,12 +469,12 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
} else if (ir->o == IR_FLOAD) {
/* Generic fusion is only ok for 32 bit operand (but see asm_comp). */
if ((irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)) &&
noconflict(as, ref, IR_FSTORE, 0)) {
noconflict(as, ref, IR_FSTORE, 2)) {
asm_fusefref(as, ir, xallow);
return RID_MRM;
}
} else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) {
if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) &&
if (noconflict(as, ref, ir->o + IRDELTA_L2S, 2+(ir->o != IR_ULOAD)) &&
!(LJ_GC64 && irt_isaddr(ir->t))) {
asm_fuseahuref(as, ir->op1, xallow);
return RID_MRM;
@ -482,7 +484,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
*/
if ((!irt_typerange(ir->t, IRT_I8, IRT_U16)) &&
noconflict(as, ref, IR_XSTORE, 0)) {
noconflict(as, ref, IR_XSTORE, 2)) {
asm_fusexref(as, ir->op1, xallow);
return RID_MRM;
}
@ -815,6 +817,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
emit_rr(as, XO_UCOMISD, left, tmp);
emit_rr(as, XO_CVTSI2SD, tmp, dest);
emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
checkmclim(as);
emit_rr(as, XO_CVTTSD2SI, dest, left);
/* Can't fuse since left is needed twice. */
}
@ -857,6 +860,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */
emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */
emit_rma(as, XO_MOVSD, bias, k);
checkmclim(as);
emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR));
return;
} else { /* Integer to FP conversion. */
@ -1173,6 +1177,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
asm_guardcc(as, CC_E);
else
emit_sjcc(as, CC_E, l_end);
checkmclim(as);
if (irt_isnum(kt)) {
if (isk) {
/* Assumes -0.0 is already canonicalized to +0.0. */
@ -1232,7 +1237,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
#endif
}
emit_sfixup(as, l_loop);
checkmclim(as);
#if LJ_GC64
if (!isk && irt_isaddr(kt)) {
emit_rr(as, XO_OR, tmp|REX_64, key);
@ -1259,6 +1263,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
emit_rr(as, XO_ARITH(XOg_SUB), dest, tmp);
emit_shifti(as, XOg_ROL, tmp, HASH_ROT3);
emit_rr(as, XO_ARITH(XOg_XOR), dest, tmp);
checkmclim(as);
emit_shifti(as, XOg_ROL, dest, HASH_ROT2);
emit_rr(as, XO_ARITH(XOg_SUB), tmp, dest);
emit_shifti(as, XOg_ROL, dest, HASH_ROT1);
@ -1276,7 +1281,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
} else {
emit_rr(as, XO_MOV, tmp, key);
#if LJ_GC64
checkmclim(as);
emit_gri(as, XG_ARITHi(XOg_XOR), dest, irt_toitype(kt) << 15);
if ((as->flags & JIT_F_BMI2)) {
emit_i8(as, 32);
@ -1373,24 +1377,31 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) {
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_rma(as, XO_MOV, dest|REX_GC64, v);
} else {
Reg uv = ra_scratch(as, RSET_GPR);
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
if (ir->o == IR_UREFC)
emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv));
asm_guardcc(as, CC_NE);
emit_i8(as, 1);
emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
} else {
else
emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v));
if (guarded) {
asm_guardcc(as, ir->o == IR_UREFC ? CC_E : CC_NE);
emit_i8(as, 0);
emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
}
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loada(as, uv, o);
} else {
emit_rmro(as, XO_MOV, uv|REX_GC64, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) +
(int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
}
emit_rmro(as, XO_MOV, uv|REX_GC64, func,
(int32_t)offsetof(GCfuncL, uvptr) +
(int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
}
}
@ -1547,6 +1558,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
if (irt_islightud(ir->t)) {
Reg dest = asm_load_lightud64(as, ir, 1);
if (ra_hasreg(dest)) {
checkmclim(as);
asm_fuseahuref(as, ir->op1, RSET_GPR);
if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2;
emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
@ -1594,6 +1606,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
if (LJ_64 && irt_type(ir->t) >= IRT_NUM) {
lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t),
"bad load type %d", irt_type(ir->t));
checkmclim(as);
#if LJ_GC64
emit_u32(as, LJ_TISNUM << 15);
#else

View file

@ -1766,9 +1766,11 @@ static void cp_pragma(CPState *cp, BCLine pragmaline)
cp_check(cp, '(');
if (cp->tok == CTOK_IDENT) {
if (cp_str_is(cp->str, "push")) {
if (cp->curpack < CPARSE_MAX_PACKSTACK) {
if (cp->curpack < CPARSE_MAX_PACKSTACK-1) {
cp->packstack[cp->curpack+1] = cp->packstack[cp->curpack];
cp->curpack++;
} else {
cp_errmsg(cp, cp->tok, LJ_ERR_XLEVELS);
}
} else if (cp_str_is(cp->str, "pop")) {
if (cp->curpack > 0) cp->curpack--;

View file

@ -267,12 +267,8 @@ static LJ_AINLINE uint32_t lj_fls(uint32_t x)
#else
unsigned char _BitScanForward(unsigned long *, unsigned long);
unsigned char _BitScanReverse(unsigned long *, unsigned long);
unsigned char _BitScanForward64(unsigned long *, uint64_t);
unsigned char _BitScanReverse64(unsigned long *, uint64_t);
#pragma intrinsic(_BitScanForward)
#pragma intrinsic(_BitScanReverse)
#pragma intrinsic(_BitScanForward64)
#pragma intrinsic(_BitScanReverse64)
static LJ_AINLINE uint32_t lj_ffs(uint32_t x)
{
@ -284,6 +280,12 @@ static LJ_AINLINE uint32_t lj_fls(uint32_t x)
unsigned long r; _BitScanReverse(&r, x); return (uint32_t)r;
}
#if defined(_M_X64) || defined(_M_ARM64)
unsigned char _BitScanForward64(unsigned long *, uint64_t);
unsigned char _BitScanReverse64(unsigned long *, uint64_t);
#pragma intrinsic(_BitScanForward64)
#pragma intrinsic(_BitScanReverse64)
static LJ_AINLINE uint32_t lj_ffs64(uint64_t x)
{
unsigned long r; _BitScanForward64(&r, x); return (uint32_t)r;
@ -294,6 +296,7 @@ static LJ_AINLINE uint32_t lj_fls64(uint64_t x)
unsigned long r; _BitScanReverse64(&r, x); return (uint32_t)r;
}
#endif
#endif
unsigned long _byteswap_ulong(unsigned long);
uint64_t _byteswap_uint64(uint64_t);

View file

@ -2134,8 +2134,26 @@ LJFOLDX(lj_opt_fwd_uload)
LJFOLD(ALEN any any)
LJFOLDX(lj_opt_fwd_alen)
/* Try to merge UREFO/UREFC into referenced instruction. */
static TRef merge_uref(jit_State *J, IRRef ref, IRIns* ir)
{
if (ir->o == IR_UREFO && irt_isguard(ir->t)) {
/* Might be pointing to some other coroutine's stack.
** And GC might shrink said stack, thereby repointing the upvalue.
** GC might even collect said coroutine, thereby closing the upvalue.
*/
if (gcstep_barrier(J, ref))
return EMITFOLD; /* So cannot merge. */
/* Current fins wants a check, but ir doesn't have one. */
if ((irt_t(fins->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC) &&
irt_type(ir->t) == IRT_IGC)
ir->t.irt += IRT_PGC-IRT_IGC; /* So install a check. */
}
return ref; /* Not a TRef, but the caller doesn't care. */
}
/* Upvalue refs are really loads, but there are no corresponding stores.
** So CSE is ok for them, except for UREFO across a GC step (see below).
** So CSE is ok for them, except for guarded UREFO across a GC step.
** If the referenced function is const, its upvalue addresses are const, too.
** This can be used to improve CSE by looking for the same address,
** even if the upvalues originate from a different function.
@ -2153,9 +2171,7 @@ LJFOLDF(cse_uref)
if (irref_isk(ir->op1)) {
GCfunc *fn2 = ir_kfunc(IR(ir->op1));
if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) {
if (fins->o == IR_UREFO && gcstep_barrier(J, ref))
break;
return ref;
return merge_uref(J, ref, ir);
}
}
ref = ir->prev;
@ -2164,6 +2180,24 @@ LJFOLDF(cse_uref)
return EMITFOLD;
}
/* Custom CSE for UREFO. */
LJFOLD(UREFO any any)
LJFOLDF(cse_urefo)
{
if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
IRRef ref = J->chain[IR_UREFO];
IRRef lim = fins->op1;
IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
while (ref > lim) {
IRIns *ir = IR(ref);
if (ir->op12 == op12)
return merge_uref(J, ref, ir);
ref = ir->prev;
}
}
return EMITFOLD;
}
LJFOLD(HREFK any any)
LJFOLDX(lj_opt_fwd_hrefk)
@ -2384,14 +2418,9 @@ LJFOLDF(fold_base)
/* Write barriers are amenable to CSE, but not across any incremental
** GC steps.
**
** The same logic applies to open upvalue references, because a stack
** may be resized during a GC step (not the current stack, but maybe that
** of a coroutine).
*/
LJFOLD(TBAR any)
LJFOLD(OBAR any any)
LJFOLD(UREFO any any)
LJFOLDF(barrier_tab)
{
TRef tr = lj_opt_cse(J);

View file

@ -466,18 +466,23 @@ doemit:
*/
static AliasRet aa_uref(IRIns *refa, IRIns *refb)
{
if (refa->o != refb->o)
return ALIAS_NO; /* Different UREFx type. */
if (refa->op1 == refb->op1) { /* Same function. */
if (refa->op2 == refb->op2)
return ALIAS_MUST; /* Same function, same upvalue idx. */
else
return ALIAS_NO; /* Same function, different upvalue idx. */
} else { /* Different functions, check disambiguation hash values. */
if (((refa->op2 ^ refb->op2) & 0xff))
if (((refa->op2 ^ refb->op2) & 0xff)) {
return ALIAS_NO; /* Upvalues with different hash values cannot alias. */
else
return ALIAS_MAY; /* No conclusion can be drawn for same hash value. */
} else if (refa->o != refb->o) {
/* Different UREFx type, but need to confirm the UREFO really is open. */
if (irt_type(refa->t) == IRT_IGC) refa->t.irt += IRT_PGC-IRT_IGC;
else if (irt_type(refb->t) == IRT_IGC) refb->t.irt += IRT_PGC-IRT_IGC;
return ALIAS_NO;
} else {
/* No conclusion can be drawn for same hash value and same UREFx type. */
return ALIAS_MAY;
}
}
}

View file

@ -976,6 +976,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
emitir(IRTG(IR_RETF, IRT_PGC), trpt, trpc);
J->retdepth++;
J->needsnap = 1;
J->scev.idx = REF_NIL;
lj_assertJ(J->baseslot == 1+LJ_FR2, "bad baseslot for return");
/* Shift result slots up and clear the slots of the new frame below. */
memmove(J->base + cbase, J->base-1-LJ_FR2, sizeof(TRef)*nresults);
@ -1772,12 +1773,12 @@ noconstify:
/* Note: this effectively limits LJ_MAX_UPVAL to 127. */
uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff);
if (!uvp->closed) {
uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_PGC), fn, uv));
/* In current stack? */
if (uvval(uvp) >= tvref(J->L->stack) &&
uvval(uvp) < tvref(J->L->maxstack)) {
int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot));
if (slot >= 0) { /* Aliases an SSA slot? */
uref = tref_ref(emitir(IRT(IR_UREFO, IRT_PGC), fn, uv));
emitir(IRTG(IR_EQ, IRT_PGC),
REF_BASE,
emitir(IRT(IR_ADD, IRT_PGC), uref,
@ -1792,12 +1793,21 @@ noconstify:
}
}
}
/* IR_UREFO+IRT_IGC is not checked for open-ness at runtime.
** Always marked as a guard, since it might get promoted to IRT_PGC later.
*/
uref = emitir(IRTG(IR_UREFO, tref_isgcv(val) ? IRT_PGC : IRT_IGC), fn, uv);
uref = tref_ref(uref);
emitir(IRTG(IR_UGT, IRT_PGC),
emitir(IRT(IR_SUB, IRT_PGC), uref, REF_BASE),
lj_ir_kintpgc(J, (J->baseslot + J->maxslot) * 8));
} else {
/* If fn is constant, then so is the GCupval*, and the upvalue cannot
** transition back to open, so no guard is required in this case.
*/
IRType t = (tref_isk(fn) ? 0 : IRT_GUARD) | IRT_PGC;
uref = tref_ref(emitir(IRT(IR_UREFC, t), fn, uv));
needbarrier = 1;
uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PGC), fn, uv));
}
if (val == 0) { /* Upvalue load */
IRType t = itype2irt(uvval(uvp));

View file

@ -350,8 +350,11 @@ void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L)
lj_assertG(L != mainthread(g), "free of main thread");
if (obj2gco(L) == gcref(g->cur_L))
setgcrefnull(g->cur_L);
lj_func_closeuv(L, tvref(L->stack));
lj_assertG(gcref(L->openupval) == NULL, "stale open upvalues");
if (gcref(L->openupval) != NULL) {
lj_func_closeuv(L, tvref(L->stack));
lj_trace_abort(g); /* For aa_uref soundness. */
lj_assertG(gcref(L->openupval) == NULL, "stale open upvalues");
}
lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue);
lj_mem_freet(g, L);
}

View file

@ -1195,8 +1195,11 @@ static void build_subroutines(BuildCtx *ctx)
|//-- Base library: catch errors ----------------------------------------
|
|.ffunc pcall
| ldr RB, L->maxstack
| add INS, BASE, NARGS8:RC
| ldrb RA, [DISPATCH, #DISPATCH_GL(hookmask)]
| cmp NARGS8:RC, #8
| cmphs RB, INS
| blo ->fff_fallback
| tst RA, #HOOK_ACTIVE // Remember active hook before pcall.
| mov RB, BASE
@ -1207,7 +1210,11 @@ static void build_subroutines(BuildCtx *ctx)
| b ->vm_call_dispatch
|
|.ffunc_2 xpcall
| ldr RB, L->maxstack
| add INS, BASE, NARGS8:RC
| ldrb RA, [DISPATCH, #DISPATCH_GL(hookmask)]
| cmp RB, INS
| blo ->fff_fallback
| checkfunc CARG4, ->fff_fallback // Traceback must be a function.
| mov RB, BASE
| strd CARG12, [BASE, #8] // Swap function and traceback.

View file

@ -1211,6 +1211,10 @@ static void build_subroutines(BuildCtx *ctx)
|//-- Base library: catch errors ----------------------------------------
|
|.ffunc pcall
| ldr TMP1, L->maxstack
| add TMP2, BASE, NARGS8:RC
| cmp TMP1, TMP2
| blo ->fff_fallback
| cmp NARGS8:RC, #8
| ldrb TMP0w, GL->hookmask
| blo ->fff_fallback
@ -1230,6 +1234,10 @@ static void build_subroutines(BuildCtx *ctx)
| b ->vm_call_dispatch
|
|.ffunc xpcall
| ldr TMP1, L->maxstack
| add TMP2, BASE, NARGS8:RC
| cmp TMP1, TMP2
| blo ->fff_fallback
| ldp CARG1, CARG2, [BASE]
| ldrb TMP0w, GL->hookmask
| subs NARGS8:TMP1, NARGS8:RC, #16

View file

@ -1374,9 +1374,13 @@ static void build_subroutines(BuildCtx *ctx)
|//-- Base library: catch errors ----------------------------------------
|
|.ffunc pcall
| lw TMP1, L->maxstack
| addu TMP2, BASE, NARGS8:RC
| lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
| beqz NARGS8:RC, ->fff_fallback
| move TMP2, BASE
|. sltu AT, TMP1, TMP2
| bnez AT, ->fff_fallback
|. move TMP2, BASE
| addiu BASE, BASE, 8
| // Remember active hook before pcall.
| srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
@ -1386,8 +1390,12 @@ static void build_subroutines(BuildCtx *ctx)
|. addiu NARGS8:RC, NARGS8:RC, -8
|
|.ffunc xpcall
| lw TMP1, L->maxstack
| addu TMP2, BASE, NARGS8:RC
| sltiu AT, NARGS8:RC, 16
| lw CARG4, 8+HI(BASE)
| sltu TMP1, TMP1, TMP2
| or AT, AT, TMP1
| bnez AT, ->fff_fallback
|. lw CARG3, 8+LO(BASE)
| lw CARG1, LO(BASE)

View file

@ -1415,8 +1415,12 @@ static void build_subroutines(BuildCtx *ctx)
|//-- Base library: catch errors ----------------------------------------
|
|.ffunc pcall
| ld TMP1, L->maxstack
| daddu TMP2, BASE, NARGS8:RC
| sltu AT, TMP1, TMP2
| bnez AT, ->fff_fallback
|. lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
| daddiu NARGS8:RC, NARGS8:RC, -8
| lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
| bltz NARGS8:RC, ->fff_fallback
|. move TMP2, BASE
| daddiu BASE, BASE, 16
@ -1437,8 +1441,12 @@ static void build_subroutines(BuildCtx *ctx)
|. nop
|
|.ffunc xpcall
| ld TMP1, L->maxstack
| daddu TMP2, BASE, NARGS8:RC
| sltu AT, TMP1, TMP2
| bnez AT, ->fff_fallback
|. ld CARG1, 0(BASE)
| daddiu NARGS8:TMP0, NARGS8:RC, -16
| ld CARG1, 0(BASE)
| ld CARG2, 8(BASE)
| bltz NARGS8:TMP0, ->fff_fallback
|. lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH)

View file

@ -1905,8 +1905,12 @@ static void build_subroutines(BuildCtx *ctx)
|//-- Base library: catch errors ----------------------------------------
|
|.ffunc pcall
| lwz TMP1, L->maxstack
| add TMP2, BASE, NARGS8:RC
| cmplwi NARGS8:RC, 8
| lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
| cmplw cr1, TMP1, TMP2
| cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| blt ->fff_fallback
| mr TMP2, BASE
| la BASE, 8(BASE)
@ -1917,14 +1921,19 @@ static void build_subroutines(BuildCtx *ctx)
| b ->vm_call_dispatch
|
|.ffunc xpcall
| lwz TMP1, L->maxstack
| add TMP2, BASE, NARGS8:RC
| cmplwi NARGS8:RC, 16
| lwz CARG3, 8+WORD_HI(BASE)
| cmplw cr1, TMP1, TMP2
|.if FPU
| lfd FARG2, 8(BASE)
| cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| lfd FARG1, 0(BASE)
|.else
| lwz CARG1, 0(BASE)
| lwz CARG2, 4(BASE)
| cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| lwz CARG4, 12(BASE)
|.endif
| blt ->fff_fallback

View file

@ -1463,6 +1463,9 @@ static void build_subroutines(BuildCtx *ctx)
|//-- Base library: catch errors ----------------------------------------
|
|.ffunc_1 pcall
| mov L:RB, SAVE_L
| lea RA, [BASE+NARGS:RD*8]
| cmp RA, L:RB->maxstack; ja ->fff_fallback
| lea RA, [BASE+16]
| sub NARGS:RDd, 1
| mov PCd, 16+FRAME_PCALL
@ -1481,6 +1484,9 @@ static void build_subroutines(BuildCtx *ctx)
| jmp ->vm_call_dispatch
|
|.ffunc_2 xpcall
| mov L:RB, SAVE_L
| lea RA, [BASE+NARGS:RD*8]
| cmp RA, L:RB->maxstack; ja ->fff_fallback
| mov LFUNC:RA, [BASE+8]
| checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback
| mov LFUNC:RB, [BASE] // Swap function and traceback.

View file

@ -1369,7 +1369,7 @@ static void build_subroutines(BuildCtx *ctx)
| mov LFUNC:RB, [RA-8]
| add NARGS:RD, 1
| // This is fragile. L->base must not move, KBASE must always be defined.
|.if x64
|.if X64
| cmp KBASEa, rdx // Continue with CALLT if flag set.
|.else
| cmp KBASE, BASE // Continue with CALLT if flag set.
@ -1793,6 +1793,9 @@ static void build_subroutines(BuildCtx *ctx)
|//-- Base library: catch errors ----------------------------------------
|
|.ffunc_1 pcall
| mov L:RB, SAVE_L
| lea RA, [BASE+NARGS:RD*8]
| cmp RA, L:RB->maxstack; ja ->fff_fallback
| lea RA, [BASE+8]
| sub NARGS:RD, 1
| mov PC, 8+FRAME_PCALL
@ -1804,6 +1807,9 @@ static void build_subroutines(BuildCtx *ctx)
| jmp ->vm_call_dispatch
|
|.ffunc_2 xpcall
| mov L:RB, SAVE_L
| lea RA, [BASE+NARGS:RD*8]
| cmp RA, L:RB->maxstack; ja ->fff_fallback
| cmp dword [BASE+12], LJ_TFUNC; jne ->fff_fallback
| mov RB, [BASE+4] // Swap function and traceback.
| mov [BASE+12], RB