Skip to content

Commit

Permalink
loongarch: Optimize jump maximum offset prediction & update action co…
Browse files Browse the repository at this point in the history
…mpiler (#273)
  • Loading branch information
lrzlin authored Nov 5, 2024
1 parent 666125a commit 446e5c7
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 31 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/actions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -237,8 +237,8 @@ jobs:
run: |
sudo apt update
sudo apt install -y make wget ninja-build
wget https://github.com/loongson/build-tools/releases/download/2024.06.01/x86_64-cross-tools-loongarch64-binutils_2.42-gcc_14.1.0-glibc_2.39.tar.xz
tar -xvf x86_64-cross-tools-loongarch64-binutils_2.42-gcc_14.1.0-glibc_2.39.tar.xz
wget https://github.com/loongson/build-tools/releases/download/2024.11.01/x86_64-cross-tools-loongarch64-binutils_2.43.1-gcc_14.2.0-glibc_2.40.tar.xz
tar -xvf x86_64-cross-tools-loongarch64-binutils_2.43.1-gcc_14.2.0-glibc_2.40.tar.xz
- name: Install qemu
run: |
wget https://download.qemu.org/qemu-9.0.2.tar.xz
Expand All @@ -248,7 +248,7 @@ jobs:
make -j4
- name: Build and test
env:
CROSS_COMPILER: ./cross-tools/bin/loongarch64-unknown-linux-gnu-gcc-14.1.0
CROSS_COMPILER: ./cross-tools/bin/loongarch64-unknown-linux-gnu-gcc-14.2.0
CFLAGS: -march=la464
EXTRA_LDFLAGS: -static
run: |
Expand Down
72 changes: 44 additions & 28 deletions sljit_src/sljitNativeLOONGARCH_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -411,96 +411,105 @@ static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
return SLJIT_SUCCESS;
}

static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code, sljit_sw executable_offset)
static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
{
sljit_sw diff;
sljit_uw target_addr;
sljit_ins *inst;

inst = (sljit_ins *)jump->addr;
sljit_uw jump_addr = (sljit_uw)code_ptr;
sljit_uw orig_addr = jump->addr;
SLJIT_UNUSED_ARG(executable_offset);

jump->addr = jump_addr;
if (jump->flags & SLJIT_REWRITABLE_JUMP)
goto exit;

if (jump->flags & JUMP_ADDR)
target_addr = jump->u.target;
else {
SLJIT_ASSERT(jump->u.label != NULL);
target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
target_addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);

if (jump->u.label->size > orig_addr)
jump_addr = (sljit_uw)(code + orig_addr);
}

diff = (sljit_sw)target_addr - (sljit_sw)inst - executable_offset;
diff = (sljit_sw)target_addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr, executable_offset);

if (jump->flags & IS_COND) {
diff += SSIZE_OF(ins);

if (diff >= BRANCH16_MIN && diff <= BRANCH16_MAX) {
inst--;
inst[0] = (inst[0] & 0xfc0003ff) ^ 0x4000000;
code_ptr--;
code_ptr[0] = (code_ptr[0] & 0xfc0003ff) ^ 0x4000000;
jump->flags |= PATCH_B;
jump->addr = (sljit_uw)inst;
return inst;
jump->addr = (sljit_uw)code_ptr;
return code_ptr;
}

diff -= SSIZE_OF(ins);
}

if (diff >= JUMP_MIN && diff <= JUMP_MAX) {
if (jump->flags & IS_COND) {
inst[-1] |= (sljit_ins)IMM_I16(2);
code_ptr[-1] |= (sljit_ins)IMM_I16(2);
}

jump->flags |= PATCH_J;
return inst;
return code_ptr;
}

if (diff >= S32_MIN && diff <= S32_MAX) {
if (jump->flags & IS_COND)
inst[-1] |= (sljit_ins)IMM_I16(3);
code_ptr[-1] |= (sljit_ins)IMM_I16(3);

jump->flags |= PATCH_REL32;
inst[1] = inst[0];
return inst + 1;
code_ptr[1] = code_ptr[0];
return code_ptr + 1;
}

if (target_addr <= (sljit_uw)S32_MAX) {
if (jump->flags & IS_COND)
inst[-1] |= (sljit_ins)IMM_I16(3);
code_ptr[-1] |= (sljit_ins)IMM_I16(3);

jump->flags |= PATCH_ABS32;
inst[1] = inst[0];
return inst + 1;
code_ptr[1] = code_ptr[0];
return code_ptr + 1;
}

if (target_addr <= S52_MAX) {
if (jump->flags & IS_COND)
inst[-1] |= (sljit_ins)IMM_I16(4);
code_ptr[-1] |= (sljit_ins)IMM_I16(4);

jump->flags |= PATCH_ABS52;
inst[2] = inst[0];
return inst + 2;
code_ptr[2] = code_ptr[0];
return code_ptr + 2;
}

exit:
if (jump->flags & IS_COND)
inst[-1] |= (sljit_ins)IMM_I16(5);
inst[3] = inst[0];
return inst + 3;
code_ptr[-1] |= (sljit_ins)IMM_I16(5);
code_ptr[3] = code_ptr[0];
return code_ptr + 3;
}

static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
{
sljit_uw addr;
sljit_uw jump_addr = (sljit_uw)code_ptr;
sljit_sw diff;
SLJIT_UNUSED_ARG(executable_offset);

SLJIT_ASSERT(jump->flags < ((sljit_uw)6 << JUMP_SIZE_SHIFT));
if (jump->flags & JUMP_ADDR)
addr = jump->u.target;
else
else {
addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);

diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
if (jump->u.label->size > jump->addr)
jump_addr = (sljit_uw)(code + jump->addr);
}

diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr, executable_offset);

if (diff >= S32_MIN && diff <= S32_MAX) {
SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT));
Expand Down Expand Up @@ -623,6 +632,10 @@ static void reduce_code_size(struct sljit_compiler *compiler)
} else {
/* Unit size: instruction. */
diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
if (jump->u.label->size > jump->addr) {
SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr);
diff -= (sljit_sw)size_reduce;
}

if ((jump->flags & IS_COND) && (diff + 1) <= (BRANCH16_MAX / SSIZE_OF(ins)) && (diff + 1) >= (BRANCH16_MIN / SSIZE_OF(ins)))
total_size = 0;
Expand All @@ -641,6 +654,10 @@ static void reduce_code_size(struct sljit_compiler *compiler)
if (!(jump->flags & JUMP_ADDR)) {
/* Real size minus 1. Unit size: instruction. */
diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
if (jump->u.label->size > jump->addr) {
SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr);
diff -= (sljit_sw)size_reduce;
}

if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins)))
total_size = 1;
Expand Down Expand Up @@ -716,8 +733,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
if (next_min_addr == next_jump_addr) {
if (!(jump->flags & JUMP_MOV_ADDR)) {
word_count = word_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT);
jump->addr = (sljit_uw)code_ptr;
code_ptr = detect_jump_type(jump, code, executable_offset);
code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);
SLJIT_ASSERT((jump->flags & PATCH_B) || ((sljit_uw)code_ptr - jump->addr < (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins)));
} else {
word_count += jump->flags >> JUMP_SIZE_SHIFT;
Expand Down

0 comments on commit 446e5c7

Please sign in to comment.