Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

loongarch: Optimize jump maximum offset prediction & update action co… #273

Merged
merged 1 commit into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/actions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -237,8 +237,8 @@ jobs:
run: |
sudo apt update
sudo apt install -y make wget ninja-build
wget https://github.com/loongson/build-tools/releases/download/2024.06.01/x86_64-cross-tools-loongarch64-binutils_2.42-gcc_14.1.0-glibc_2.39.tar.xz
tar -xvf x86_64-cross-tools-loongarch64-binutils_2.42-gcc_14.1.0-glibc_2.39.tar.xz
wget https://github.com/loongson/build-tools/releases/download/2024.11.01/x86_64-cross-tools-loongarch64-binutils_2.43.1-gcc_14.2.0-glibc_2.40.tar.xz
tar -xvf x86_64-cross-tools-loongarch64-binutils_2.43.1-gcc_14.2.0-glibc_2.40.tar.xz
- name: Install qemu
run: |
wget https://download.qemu.org/qemu-9.0.2.tar.xz
Expand All @@ -248,7 +248,7 @@ jobs:
make -j4
- name: Build and test
env:
CROSS_COMPILER: ./cross-tools/bin/loongarch64-unknown-linux-gnu-gcc-14.1.0
CROSS_COMPILER: ./cross-tools/bin/loongarch64-unknown-linux-gnu-gcc-14.2.0
CFLAGS: -march=la464
EXTRA_LDFLAGS: -static
run: |
Expand Down
72 changes: 44 additions & 28 deletions sljit_src/sljitNativeLOONGARCH_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -411,96 +411,105 @@ static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
return SLJIT_SUCCESS;
}

static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code, sljit_sw executable_offset)
static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
{
sljit_sw diff;
sljit_uw target_addr;
sljit_ins *inst;

inst = (sljit_ins *)jump->addr;
sljit_uw jump_addr = (sljit_uw)code_ptr;
sljit_uw orig_addr = jump->addr;
SLJIT_UNUSED_ARG(executable_offset);

jump->addr = jump_addr;
if (jump->flags & SLJIT_REWRITABLE_JUMP)
goto exit;

if (jump->flags & JUMP_ADDR)
target_addr = jump->u.target;
else {
SLJIT_ASSERT(jump->u.label != NULL);
target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
target_addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);

if (jump->u.label->size > orig_addr)
jump_addr = (sljit_uw)(code + orig_addr);
}

diff = (sljit_sw)target_addr - (sljit_sw)inst - executable_offset;
diff = (sljit_sw)target_addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr, executable_offset);

if (jump->flags & IS_COND) {
diff += SSIZE_OF(ins);

if (diff >= BRANCH16_MIN && diff <= BRANCH16_MAX) {
inst--;
inst[0] = (inst[0] & 0xfc0003ff) ^ 0x4000000;
code_ptr--;
code_ptr[0] = (code_ptr[0] & 0xfc0003ff) ^ 0x4000000;
jump->flags |= PATCH_B;
jump->addr = (sljit_uw)inst;
return inst;
jump->addr = (sljit_uw)code_ptr;
return code_ptr;
}

diff -= SSIZE_OF(ins);
}

if (diff >= JUMP_MIN && diff <= JUMP_MAX) {
if (jump->flags & IS_COND) {
inst[-1] |= (sljit_ins)IMM_I16(2);
code_ptr[-1] |= (sljit_ins)IMM_I16(2);
}

jump->flags |= PATCH_J;
return inst;
return code_ptr;
}

if (diff >= S32_MIN && diff <= S32_MAX) {
if (jump->flags & IS_COND)
inst[-1] |= (sljit_ins)IMM_I16(3);
code_ptr[-1] |= (sljit_ins)IMM_I16(3);

jump->flags |= PATCH_REL32;
inst[1] = inst[0];
return inst + 1;
code_ptr[1] = code_ptr[0];
return code_ptr + 1;
}

if (target_addr <= (sljit_uw)S32_MAX) {
if (jump->flags & IS_COND)
inst[-1] |= (sljit_ins)IMM_I16(3);
code_ptr[-1] |= (sljit_ins)IMM_I16(3);

jump->flags |= PATCH_ABS32;
inst[1] = inst[0];
return inst + 1;
code_ptr[1] = code_ptr[0];
return code_ptr + 1;
}

if (target_addr <= S52_MAX) {
if (jump->flags & IS_COND)
inst[-1] |= (sljit_ins)IMM_I16(4);
code_ptr[-1] |= (sljit_ins)IMM_I16(4);

jump->flags |= PATCH_ABS52;
inst[2] = inst[0];
return inst + 2;
code_ptr[2] = code_ptr[0];
return code_ptr + 2;
}

exit:
if (jump->flags & IS_COND)
inst[-1] |= (sljit_ins)IMM_I16(5);
inst[3] = inst[0];
return inst + 3;
code_ptr[-1] |= (sljit_ins)IMM_I16(5);
code_ptr[3] = code_ptr[0];
return code_ptr + 3;
}

static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
{
sljit_uw addr;
sljit_uw jump_addr = (sljit_uw)code_ptr;
sljit_sw diff;
SLJIT_UNUSED_ARG(executable_offset);

SLJIT_ASSERT(jump->flags < ((sljit_uw)6 << JUMP_SIZE_SHIFT));
if (jump->flags & JUMP_ADDR)
addr = jump->u.target;
else
else {
addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);

diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
if (jump->u.label->size > jump->addr)
jump_addr = (sljit_uw)(code + jump->addr);
}

diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr, executable_offset);

if (diff >= S32_MIN && diff <= S32_MAX) {
SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT));
Expand Down Expand Up @@ -623,6 +632,10 @@ static void reduce_code_size(struct sljit_compiler *compiler)
} else {
/* Unit size: instruction. */
diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
if (jump->u.label->size > jump->addr) {
SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr);
diff -= (sljit_sw)size_reduce;
}

if ((jump->flags & IS_COND) && (diff + 1) <= (BRANCH16_MAX / SSIZE_OF(ins)) && (diff + 1) >= (BRANCH16_MIN / SSIZE_OF(ins)))
total_size = 0;
Expand All @@ -641,6 +654,10 @@ static void reduce_code_size(struct sljit_compiler *compiler)
if (!(jump->flags & JUMP_ADDR)) {
/* Real size minus 1. Unit size: instruction. */
diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
if (jump->u.label->size > jump->addr) {
SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr);
diff -= (sljit_sw)size_reduce;
}

if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins)))
total_size = 1;
Expand Down Expand Up @@ -716,8 +733,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
if (next_min_addr == next_jump_addr) {
if (!(jump->flags & JUMP_MOV_ADDR)) {
word_count = word_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT);
jump->addr = (sljit_uw)code_ptr;
code_ptr = detect_jump_type(jump, code, executable_offset);
code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);
SLJIT_ASSERT((jump->flags & PATCH_B) || ((sljit_uw)code_ptr - jump->addr < (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins)));
} else {
word_count += jump->flags >> JUMP_SIZE_SHIFT;
Expand Down
Loading