Skip to content

Commit

Permalink
Checks for separated vector registers (#275)
Browse files Browse the repository at this point in the history
  • Loading branch information
vorosl authored Nov 21, 2024
1 parent 9dc3a4c commit 2cf8a24
Show file tree
Hide file tree
Showing 4 changed files with 167 additions and 43 deletions.
152 changes: 131 additions & 21 deletions sljit_src/sljitLir.c
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,9 @@
/* Mask for sljit_emit_enter. */
#define ENTER_GET_REGS(regs) ((regs) & 0xff)
#define ENTER_GET_FLOAT_REGS(regs) (((regs) >> 8) & 0xff)
#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS)
#define ENTER_GET_VECTOR_REGS(regs) (((regs) >> 16) & 0xff)
#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */
#define SLJIT_KEPT_SAVEDS_COUNT(options) ((options) & 0x3)

/* Getters for simd operations, which returns with log2(size). */
Expand Down Expand Up @@ -312,6 +315,12 @@
(((fscratches < SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS ? 0 : (fscratches - SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS)) + \
(fsaveds)) * SSIZE_OF(type))

#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS)
#define GET_SAVED_VECTOR_REGISTERS_SIZE(vscratches, vsaveds, type) \
(((vscratches < SLJIT_NUMBER_OF_SCRATCH_VECTOR_REGISTERS ? 0 : (vscratches - SLJIT_NUMBER_OF_SCRATCH_VECTOR_REGISTERS)) + \
(vsaveds)) * SSIZE_OF(type))
#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */

#define ADJUST_LOCAL_OFFSET(p, i) \
if ((p) == (SLJIT_MEM1(SLJIT_SP))) \
(i) += SLJIT_LOCALS_OFFSET;
Expand Down Expand Up @@ -504,6 +513,10 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allo
compiler->saveds = -1;
compiler->fscratches = -1;
compiler->fsaveds = -1;
#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS)
compiler->vscratches = -1;
compiler->vsaveds = -1;
#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */
compiler->local_size = -1;

#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
Expand Down Expand Up @@ -767,6 +780,10 @@ static SLJIT_INLINE void set_emit_enter(struct sljit_compiler *compiler,
compiler->saveds = ENTER_GET_REGS(saveds);
compiler->fscratches = ENTER_GET_FLOAT_REGS(scratches);
compiler->fsaveds = ENTER_GET_FLOAT_REGS(saveds);
#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS)
compiler->vscratches = ENTER_GET_VECTOR_REGS(scratches);
compiler->vsaveds = ENTER_GET_VECTOR_REGS(saveds);
#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->last_return = args & SLJIT_ARG_MASK;
compiler->logical_local_size = local_size;
Expand All @@ -785,6 +802,10 @@ static SLJIT_INLINE void set_set_context(struct sljit_compiler *compiler,
compiler->saveds = ENTER_GET_REGS(saveds);
compiler->fscratches = ENTER_GET_FLOAT_REGS(scratches);
compiler->fsaveds = ENTER_GET_FLOAT_REGS(saveds);
#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS)
compiler->vscratches = ENTER_GET_VECTOR_REGS(scratches);
compiler->vsaveds = ENTER_GET_VECTOR_REGS(saveds);
#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->last_return = args & SLJIT_ARG_MASK;
compiler->logical_local_size = local_size;
Expand Down Expand Up @@ -982,10 +1003,23 @@ static sljit_s32 function_check_dst(struct sljit_compiler *compiler, sljit_s32 p
function_check_is_freg(compiler, (fr), (is_32))

static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32);
#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS)
#define FUNCTION_CHECK_IS_VREG(vr, is_32) \
function_check_is_vreg(compiler, (vr), (is_32))

static sljit_s32 function_check_is_vreg(struct sljit_compiler *compiler, sljit_s32 vr, sljit_s32 is_32);
#else /* !SLJIT_SEPARATE_VECTOR_REGISTERS */
#define FUNCTION_CHECK_IS_VREG(vr, is_32) \
function_check_is_freg(compiler, (vr), (is_32))
#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */

#define FUNCTION_FCHECK(p, i, is_32) \
CHECK_ARGUMENT(function_fcheck(compiler, (p), (i), (is_32)));

#define FUNCTION_VCHECK(p, i, is_32) FUNCTION_FCHECK(p, i, is_32)

#define FUNCTION_FVCHECK(p, i, is_32) FUNCTION_FCHECK(p, i, is_32)

static sljit_s32 function_fcheck(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i, sljit_s32 is_32)
{
if (compiler->scratches == -1)
Expand All @@ -1010,6 +1044,23 @@ static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s
|| (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0)
|| (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS));
}
#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS)
#define FUNCTION_CHECK_IS_VREG(vr, is_32) \
function_check_is_vreg(compiler, (vr))

static sljit_s32 function_check_is_vreg(struct sljit_compiler *compiler, sljit_s32 vr)
{
if (compiler->scratches == -1)
return 0;

return (vr >= SLJIT_VR0 && vr < (SLJIT_VR0 + compiler->vscratches))
|| (vr > (SLJIT_VS0 - compiler->vsaveds) && vr <= SLJIT_VS0)
|| (vr >= SLJIT_TMP_VREGISTER_BASE && vr < (SLJIT_TMP_VREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_VECTOR_REGISTERS));
}
#else /* !SLJIT_SEPARATE_VECTOR_REGISTERS */
#define FUNCTION_CHECK_IS_VREG(vr, is_32) \
function_check_is_freg(compiler, (vr))
#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */

#define FUNCTION_FCHECK(p, i, is_32) \
CHECK_ARGUMENT(function_fcheck(compiler, (p), (i)));
Expand All @@ -1027,6 +1078,28 @@ static sljit_s32 function_fcheck(struct sljit_compiler *compiler, sljit_s32 p, s
return function_check_src_mem(compiler, p, i);
}

#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS)
#define FUNCTION_VCHECK(p, i, is_32) \
CHECK_ARGUMENT(function_vcheck(compiler, (p), (i)))
static sljit_s32 function_vcheck(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i)
{
if (compiler->scratches == -1)
return 0;

if ((p >= SLJIT_VR0 && p < SLJIT_VR0 + compiler->vscratches)
|| (p > (SLJIT_VS0 - compiler->vsaveds) && p <= SLJIT_VS0)
|| (p >= SLJIT_TMP_VREGISTER_BASE && p < (SLJIT_TMP_VREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_VECTOR_REGISTERS)))
return (i==0);

return function_check_src_mem(compiler, p, i);
}
#define FUNCTION_FVCHECK(p, i, is_32) \
CHECK_ARGUMENT(function_fcheck(compiler, (p), (i)) || function_vcheck(compiler, (p), (i)));
#else /* !SLJIT_SEPARATE_VECTOR_REGISTERS */
#define FUNCTION_VCHECK(p, i, is_32) FUNCTION_FCHECK(p, i, is_32)
#define FUNCTION_FVCHECK(p, i, is_32) FUNCTION_FCHECK(p, i, is_32)
#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */

#endif /* SLJIT_CONFIG_ARM_32 || SLJIT_CONFIG_MIPS_32 */

#endif /* SLJIT_ARGUMENT_CHECKS */
Expand Down Expand Up @@ -1280,7 +1353,11 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compil
sljit_s32 real_saveds = ENTER_GET_REGS(saveds);
sljit_s32 real_fscratches = ENTER_GET_FLOAT_REGS(scratches);
sljit_s32 real_fsaveds = ENTER_GET_FLOAT_REGS(saveds);
#endif
#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS)
sljit_s32 real_vscratches = ENTER_GET_VECTOR_REGS(scratches);
sljit_s32 real_vsaveds = ENTER_GET_VECTOR_REGS(saveds);
#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */
#endif /* SLJIT_ARGUMENT_CHECKS */
SLJIT_UNUSED_ARG(compiler);

#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
Expand All @@ -1290,20 +1367,25 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compil
CHECK_ARGUMENT((options & ~SLJIT_ENTER_CPU_SPECIFIC_OPTIONS) == 0);
}
CHECK_ARGUMENT(SLJIT_KEPT_SAVEDS_COUNT(options) <= 3 && SLJIT_KEPT_SAVEDS_COUNT(options) <= saveds);
CHECK_ARGUMENT((scratches & ~0xffff) == 0 && (saveds & ~0xffff) == 0);
CHECK_ARGUMENT((scratches & ~0xffffff) == 0 && (saveds & ~0xffffff) == 0);
CHECK_ARGUMENT(real_scratches >= 0 && real_scratches <= SLJIT_NUMBER_OF_REGISTERS);
CHECK_ARGUMENT(real_saveds >= 0 && real_saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS);
CHECK_ARGUMENT(real_scratches + real_saveds <= SLJIT_NUMBER_OF_REGISTERS);
CHECK_ARGUMENT(real_fscratches >= 0 && real_fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
CHECK_ARGUMENT(real_fsaveds >= 0 && real_fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS);
CHECK_ARGUMENT(real_fscratches + real_fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS)
CHECK_ARGUMENT(real_vscratches >= 0 && real_vscratches <= SLJIT_NUMBER_OF_VECTOR_REGISTERS);
CHECK_ARGUMENT(real_vsaveds >= 0 && real_vsaveds <= SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS);
CHECK_ARGUMENT(real_vscratches + real_vsaveds <= SLJIT_NUMBER_OF_VECTOR_REGISTERS);
#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */
CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE);
CHECK_ARGUMENT((arg_types & SLJIT_ARG_FULL_MASK) <= SLJIT_ARG_TYPE_F32);
CHECK_ARGUMENT(function_check_arguments(arg_types, real_scratches,
(options & SLJIT_ENTER_REG_ARG) ? 0 : real_saveds, real_fscratches));

compiler->last_flags = 0;
#endif
#endif /* SLJIT_ARGUMENT_CHECKS */
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
fprintf(compiler->verbose, " enter ret[%s", call_arg_names[arg_types & SLJIT_ARG_MASK]);
Expand Down Expand Up @@ -1335,8 +1417,14 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compil
}
#endif /* !SLJIT_CONFIG_X86 */

#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS)
fprintf(compiler->verbose, " scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, vscratches:%d, vsaveds:%d, local_size:%d\n",
ENTER_GET_REGS(scratches), ENTER_GET_REGS(saveds), ENTER_GET_FLOAT_REGS(scratches), ENTER_GET_FLOAT_REGS(saveds),
ENTER_GET_VECTOR_REGS(scratches), ENTER_GET_VECTOR_REGS(saveds), local_size);
#else /* !SLJIT_SEPARATE_VECTOR_REGISTERS */
fprintf(compiler->verbose, " scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, local_size:%d\n",
ENTER_GET_REGS(scratches), ENTER_GET_REGS(saveds), ENTER_GET_FLOAT_REGS(scratches), ENTER_GET_FLOAT_REGS(saveds), local_size);
#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */
}
#endif
CHECK_RETURN_OK;
Expand All @@ -1351,6 +1439,10 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compi
sljit_s32 real_saveds = ENTER_GET_REGS(saveds);
sljit_s32 real_fscratches = ENTER_GET_FLOAT_REGS(scratches);
sljit_s32 real_fsaveds = ENTER_GET_FLOAT_REGS(saveds);
#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS)
sljit_s32 real_vscratches = ENTER_GET_VECTOR_REGS(scratches);
sljit_s32 real_vsaveds = ENTER_GET_VECTOR_REGS(saveds);
#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */
#endif
SLJIT_UNUSED_ARG(compiler);

Expand All @@ -1368,6 +1460,11 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compi
CHECK_ARGUMENT(real_fscratches >= 0 && real_fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
CHECK_ARGUMENT(real_fsaveds >= 0 && real_fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS);
CHECK_ARGUMENT(real_fscratches + real_fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS)
CHECK_ARGUMENT(real_vscratches >= 0 && real_vscratches <= SLJIT_NUMBER_OF_VECTOR_REGISTERS);
CHECK_ARGUMENT(real_vsaveds >= 0 && real_vsaveds <= SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS);
CHECK_ARGUMENT(real_vscratches + real_vsaveds <= SLJIT_NUMBER_OF_VECTOR_REGISTERS);
#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */
CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE);
CHECK_ARGUMENT((arg_types & SLJIT_ARG_FULL_MASK) < SLJIT_ARG_TYPE_F64);
CHECK_ARGUMENT(function_check_arguments(arg_types, real_scratches,
Expand Down Expand Up @@ -1406,8 +1503,14 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compi
}
#endif /* !SLJIT_CONFIG_X86 */

#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS)
fprintf(compiler->verbose, " scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, vscratches:%d, vsaveds:%d, local_size:%d\n",
ENTER_GET_REGS(scratches), ENTER_GET_REGS(saveds), ENTER_GET_FLOAT_REGS(scratches), ENTER_GET_FLOAT_REGS(saveds),
ENTER_GET_VECTOR_REGS(scratches), ENTER_GET_VECTOR_REGS(saveds), local_size);
#else /* !SLJIT_SEPARATE_VECTOR_REGISTERS */
fprintf(compiler->verbose, " scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, local_size:%d\n",
ENTER_GET_REGS(scratches), ENTER_GET_REGS(saveds), ENTER_GET_FLOAT_REGS(scratches), ENTER_GET_FLOAT_REGS(saveds), local_size);
#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */
}
#endif
CHECK_RETURN_OK;
Expand Down Expand Up @@ -1878,8 +1981,15 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_register_index(sljit_s32 t
if (type == SLJIT_GP_REGISTER) {
CHECK_ARGUMENT((reg > 0 && reg <= SLJIT_NUMBER_OF_REGISTERS)
|| (reg >= SLJIT_TMP_REGISTER_BASE && reg < (SLJIT_TMP_REGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_REGISTERS)));
} else {
CHECK_ARGUMENT(type == SLJIT_FLOAT_REGISTER || ((type >> 12) == 0 || ((type >> 12) >= 3 && (type >> 12) <= 6)));
}
#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS)
else if (((type >> 12) == 0 || ((type >> 12) >= 3 && (type >> 12) <= 6))) {
CHECK_ARGUMENT((reg > 0 && reg <= SLJIT_NUMBER_OF_VECTOR_REGISTERS)
|| (reg >= SLJIT_TMP_VREGISTER_BASE && reg < (SLJIT_TMP_VREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_VECTOR_REGISTERS)));
}
#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */
else {
CHECK_ARGUMENT(type == SLJIT_FLOAT_REGISTER || ((type >> 12) == 0 || ((type >> 12) >= 3 && (type >> 12) <= 6) || (type & (3 << 12)) || (type & (4 << 12)) || (type & (5 << 12)) || (type & (6 << 12))));
CHECK_ARGUMENT((reg > 0 && reg <= SLJIT_NUMBER_OF_FLOAT_REGISTERS)
|| (reg >= SLJIT_TMP_FREGISTER_BASE && reg < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS)));
}
Expand Down Expand Up @@ -1936,8 +2046,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1(struct sljit_compile
CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
CHECK_ARGUMENT(SLJIT_CHECK_OPCODE(op, 0) >= SLJIT_MOV_F64 && SLJIT_CHECK_OPCODE(op, 0) <= SLJIT_ABS_F64);
CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
FUNCTION_FCHECK(src, srcw, op & SLJIT_32);
FUNCTION_FCHECK(dst, dstw, op & SLJIT_32);
FUNCTION_FVCHECK(src, srcw, op & SLJIT_32);
FUNCTION_FVCHECK(dst, dstw, op & SLJIT_32);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
Expand Down Expand Up @@ -2767,8 +2877,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_mov(struct sljit_com
CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type));
CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) <= SLJIT_SIMD_GET_REG_SIZE(type));
CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM2_SIZE(type) <= (srcdst & SLJIT_MEM) ? SLJIT_SIMD_GET_REG_SIZE(type) : 0);
CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(vreg, 0));
FUNCTION_FCHECK(srcdst, srcdstw, 0);
CHECK_ARGUMENT(FUNCTION_CHECK_IS_VREG(vreg, 0));
FUNCTION_VCHECK(srcdst, srcdstw, 0);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
Expand Down Expand Up @@ -2808,13 +2918,13 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_replicate(struct slj
CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK(0)) == 0);
CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type));
CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type));
CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(vreg, 0));
CHECK_ARGUMENT(FUNCTION_CHECK_IS_VREG(vreg, 0));

if (type & SLJIT_SIMD_FLOAT) {
if (src == SLJIT_IMM) {
CHECK_ARGUMENT(srcw == 0);
} else {
FUNCTION_FCHECK(src, srcw, SLJIT_SIMD_GET_ELEM_SIZE(type) == 2);
FUNCTION_VCHECK(src, srcw, SLJIT_SIMD_GET_ELEM_SIZE(type) == 2);
}
} else if (src != SLJIT_IMM) {
FUNCTION_CHECK_DST(src, srcw);
Expand Down Expand Up @@ -2859,11 +2969,11 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_lane_mov(struct slji
CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type));
CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type));
CHECK_ARGUMENT(!(type & SLJIT_32) || SLJIT_SIMD_GET_ELEM_SIZE(type) <= 2);
CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(vreg, 0));
CHECK_ARGUMENT(FUNCTION_CHECK_IS_VREG(vreg, 0));
CHECK_ARGUMENT(lane_index >= 0 && lane_index < (1 << (SLJIT_SIMD_GET_REG_SIZE(type) - SLJIT_SIMD_GET_ELEM_SIZE(type))));

if (type & SLJIT_SIMD_FLOAT) {
FUNCTION_FCHECK(srcdst, srcdstw, SLJIT_SIMD_GET_ELEM_SIZE(type) == 2);
FUNCTION_VCHECK(srcdst, srcdstw, SLJIT_SIMD_GET_ELEM_SIZE(type) == 2);
} else if ((type & SLJIT_SIMD_STORE) || srcdst != SLJIT_IMM) {
FUNCTION_CHECK_DST(srcdst, srcdstw);
}
Expand Down Expand Up @@ -2907,8 +3017,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_lane_replicate(struc
CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK(0)) == 0);
CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type));
CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type));
CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(vreg, 0));
CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(src, 0));
CHECK_ARGUMENT(FUNCTION_CHECK_IS_VREG(vreg, 0));
CHECK_ARGUMENT(FUNCTION_CHECK_IS_VREG(src, 0));
CHECK_ARGUMENT(src_lane_index >= 0 && src_lane_index < (1 << (SLJIT_SIMD_GET_REG_SIZE(type) - SLJIT_SIMD_GET_ELEM_SIZE(type))));
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
Expand Down Expand Up @@ -2945,8 +3055,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_extend(struct sljit_
CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type));
CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM2_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type));
CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_ELEM2_SIZE(type));
CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(vreg, 0));
FUNCTION_FCHECK(src, srcw, SLJIT_SIMD_GET_ELEM_SIZE(type) == 2);
CHECK_ARGUMENT(FUNCTION_CHECK_IS_VREG(vreg, 0));
FUNCTION_VCHECK(src, srcw, SLJIT_SIMD_GET_ELEM_SIZE(type) == 2);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
Expand Down Expand Up @@ -2983,7 +3093,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_sign(struct sljit_co
CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK(SLJIT_32)) == SLJIT_SIMD_STORE);
CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type));
CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type));
CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(vreg, 0));
CHECK_ARGUMENT(FUNCTION_CHECK_IS_VREG(vreg, 0));
FUNCTION_CHECK_DST(dst, dstw);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
Expand Down Expand Up @@ -3020,9 +3130,9 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_op2(struct sljit_com
CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) <= SLJIT_SIMD_GET_REG_SIZE(type));
CHECK_ARGUMENT(SLJIT_SIMD_GET_OPCODE(type) != SLJIT_SIMD_OP2_SHUFFLE || (SLJIT_SIMD_GET_ELEM_SIZE(type) == 0 && !(type & SLJIT_SIMD_FLOAT)));
CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM2_SIZE(type) <= (src2 & SLJIT_MEM) ? SLJIT_SIMD_GET_REG_SIZE(type) : 0);
CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(dst_vreg, 0));
CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(src1_vreg, 0));
FUNCTION_FCHECK(src2, src2w, 0);
CHECK_ARGUMENT(FUNCTION_CHECK_IS_VREG(dst_vreg, 0));
CHECK_ARGUMENT(FUNCTION_CHECK_IS_VREG(src1_vreg, 0));
FUNCTION_VCHECK(src2, src2w, 0);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
Expand Down
14 changes: 14 additions & 0 deletions sljit_src/sljitLir.h
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,12 @@ struct sljit_compiler {
sljit_s32 fscratches;
/* Available float saved registers. */
sljit_s32 fsaveds;
#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS)
/* Available vector scratch registers. */
sljit_s32 vscratches;
/* Available vector saved registers. */
sljit_s32 vsaveds;
#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */
/* Local stack size. */
sljit_s32 local_size;
/* Maximum code size. */
Expand Down Expand Up @@ -894,6 +900,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type);
specified by SLJIT_ENTER_FLOAT. */
#define SLJIT_ENTER_FLOAT(regs) ((regs) << 8)

/* Vector scratch and saved registers can be specified
by SLJIT_ENTER_VECTOR. */
#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS)
#define SLJIT_ENTER_VECTOR(regs) ((regs) << 16)
#else /* !SLJIT_SEPARATE_VECTOR_REGISTERS */
#define SLJIT_ENTER_VECTOR(regs) SLJIT_ENTER_FLOAT(regs)
#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */

/* The local_size must be >= 0 and <= SLJIT_MAX_LOCAL_SIZE. */
#define SLJIT_MAX_LOCAL_SIZE 1048576

Expand Down
Loading

0 comments on commit 2cf8a24

Please sign in to comment.