From 446e5c71fdfc02870110c49f9a20a336e720f3b8 Mon Sep 17 00:00:00 2001 From: lrzlin Date: Tue, 5 Nov 2024 23:09:12 +0800 Subject: [PATCH] loongarch: Optimize jump maximum offset prediction & update action compiler (#273) --- .github/workflows/actions.yml | 6 +-- sljit_src/sljitNativeLOONGARCH_64.c | 72 ++++++++++++++++++----------- 2 files changed, 47 insertions(+), 31 deletions(-) diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index 35aeb7c6..088fec52 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -237,8 +237,8 @@ jobs: run: | sudo apt update sudo apt install -y make wget ninja-build - wget https://github.com/loongson/build-tools/releases/download/2024.06.01/x86_64-cross-tools-loongarch64-binutils_2.42-gcc_14.1.0-glibc_2.39.tar.xz - tar -xvf x86_64-cross-tools-loongarch64-binutils_2.42-gcc_14.1.0-glibc_2.39.tar.xz + wget https://github.com/loongson/build-tools/releases/download/2024.11.01/x86_64-cross-tools-loongarch64-binutils_2.43.1-gcc_14.2.0-glibc_2.40.tar.xz + tar -xvf x86_64-cross-tools-loongarch64-binutils_2.43.1-gcc_14.2.0-glibc_2.40.tar.xz - name: Install qemu run: | wget https://download.qemu.org/qemu-9.0.2.tar.xz @@ -248,7 +248,7 @@ jobs: make -j4 - name: Build and test env: - CROSS_COMPILER: ./cross-tools/bin/loongarch64-unknown-linux-gnu-gcc-14.1.0 + CROSS_COMPILER: ./cross-tools/bin/loongarch64-unknown-linux-gnu-gcc-14.2.0 CFLAGS: -march=la464 EXTRA_LDFLAGS: -static run: | diff --git a/sljit_src/sljitNativeLOONGARCH_64.c b/sljit_src/sljitNativeLOONGARCH_64.c index 1d3f66cd..7a46543b 100644 --- a/sljit_src/sljitNativeLOONGARCH_64.c +++ b/sljit_src/sljitNativeLOONGARCH_64.c @@ -411,14 +411,15 @@ static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code, sljit_sw executable_offset) +static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) { sljit_sw diff; sljit_uw target_addr; - sljit_ins *inst; - - inst = (sljit_ins *)jump->addr; + sljit_uw jump_addr = (sljit_uw)code_ptr; + sljit_uw orig_addr = jump->addr; + SLJIT_UNUSED_ARG(executable_offset); + jump->addr = jump_addr; if (jump->flags & SLJIT_REWRITABLE_JUMP) goto exit; @@ -426,20 +427,23 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i target_addr = jump->u.target; else { SLJIT_ASSERT(jump->u.label != NULL); - target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; + target_addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); + + if (jump->u.label->size > orig_addr) + jump_addr = (sljit_uw)(code + orig_addr); } - diff = (sljit_sw)target_addr - (sljit_sw)inst - executable_offset; + diff = (sljit_sw)target_addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr, executable_offset); if (jump->flags & IS_COND) { diff += SSIZE_OF(ins); if (diff >= BRANCH16_MIN && diff <= BRANCH16_MAX) { - inst--; - inst[0] = (inst[0] & 0xfc0003ff) ^ 0x4000000; + code_ptr--; + code_ptr[0] = (code_ptr[0] & 0xfc0003ff) ^ 0x4000000; jump->flags |= PATCH_B; - jump->addr = (sljit_uw)inst; - return inst; + jump->addr = (sljit_uw)code_ptr; + return code_ptr; } diff -= SSIZE_OF(ins); @@ -447,60 +451,65 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i if (diff >= JUMP_MIN && diff <= JUMP_MAX) { if (jump->flags & IS_COND) { - inst[-1] |= (sljit_ins)IMM_I16(2); + code_ptr[-1] |= (sljit_ins)IMM_I16(2); } jump->flags |= PATCH_J; - return inst; + return code_ptr; } if (diff >= S32_MIN && diff <= S32_MAX) { if (jump->flags & IS_COND) - inst[-1] |= (sljit_ins)IMM_I16(3); + code_ptr[-1] |= (sljit_ins)IMM_I16(3); jump->flags |= PATCH_REL32; - inst[1] = inst[0]; - return inst + 1; + code_ptr[1] = code_ptr[0]; + return code_ptr + 1; } if (target_addr <= (sljit_uw)S32_MAX) { if (jump->flags & IS_COND) - inst[-1] |= (sljit_ins)IMM_I16(3); + code_ptr[-1] |= (sljit_ins)IMM_I16(3); jump->flags |= PATCH_ABS32; - inst[1] = inst[0]; - return inst + 1; + code_ptr[1] = code_ptr[0]; + return code_ptr + 1; } if (target_addr <= S52_MAX) { if (jump->flags & IS_COND) - inst[-1] |= (sljit_ins)IMM_I16(4); + code_ptr[-1] |= (sljit_ins)IMM_I16(4); jump->flags |= PATCH_ABS52; - inst[2] = inst[0]; - return inst + 2; + code_ptr[2] = code_ptr[0]; + return code_ptr + 2; } exit: if (jump->flags & IS_COND) - inst[-1] |= (sljit_ins)IMM_I16(5); - inst[3] = inst[0]; - return inst + 3; + code_ptr[-1] |= (sljit_ins)IMM_I16(5); + code_ptr[3] = code_ptr[0]; + return code_ptr + 3; } static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) { sljit_uw addr; + sljit_uw jump_addr = (sljit_uw)code_ptr; sljit_sw diff; SLJIT_UNUSED_ARG(executable_offset); SLJIT_ASSERT(jump->flags < ((sljit_uw)6 << JUMP_SIZE_SHIFT)); if (jump->flags & JUMP_ADDR) addr = jump->u.target; - else + else { addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset); - diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + if (jump->u.label->size > jump->addr) + jump_addr = (sljit_uw)(code + jump->addr); + } + + diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr, executable_offset); if (diff >= S32_MIN && diff <= S32_MAX) { SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT)); @@ -623,6 +632,10 @@ static void reduce_code_size(struct sljit_compiler *compiler) } else { /* Unit size: instruction. */ diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + if (jump->u.label->size > jump->addr) { + SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr); + diff -= (sljit_sw)size_reduce; + } if ((jump->flags & IS_COND) && (diff + 1) <= (BRANCH16_MAX / SSIZE_OF(ins)) && (diff + 1) >= (BRANCH16_MIN / SSIZE_OF(ins))) total_size = 0; @@ -641,6 +654,10 @@ static void reduce_code_size(struct sljit_compiler *compiler) if (!(jump->flags & JUMP_ADDR)) { /* Real size minus 1. Unit size: instruction. */ diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr; + if (jump->u.label->size > jump->addr) { + SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr); + diff -= (sljit_sw)size_reduce; + } if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins))) total_size = 1; @@ -716,8 +733,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (next_min_addr == next_jump_addr) { if (!(jump->flags & JUMP_MOV_ADDR)) { word_count = word_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT); - jump->addr = (sljit_uw)code_ptr; - code_ptr = detect_jump_type(jump, code, executable_offset); + code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset); SLJIT_ASSERT((jump->flags & PATCH_B) || ((sljit_uw)code_ptr - jump->addr < (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins))); } else { word_count += jump->flags >> JUMP_SIZE_SHIFT;