Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

i#3044: AArch64 SVE2 codec: add vector+scalar versions of st/ldnt #6468

Merged
merged 4 commits into from
Nov 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 98 additions & 55 deletions core/ir/aarch64/codec.c
Original file line number Diff line number Diff line change
Expand Up @@ -7950,6 +7950,99 @@ memory_transfer_size_from_dtype(uint enc)
return opnd_size_from_bytes((1 << insz) * elements);
}

static inline bool
decode_svemem_vec_sd_gpr16(uint size_bit, uint enc, int opcode, byte *pc,
OUT opnd_t *opnd)
{
const aarch64_reg_offset msz = BITS(enc, 24, 23);
const uint scale = 1 << msz;

uint single_bit_value = 0;

if (size_bit == 22)
single_bit_value = 1;

const aarch64_reg_offset element_size =
BITS(enc, size_bit, size_bit) == single_bit_value ? SINGLE_REG : DOUBLE_REG;

const opnd_size_t mem_transfer =
opnd_size_from_bytes(scale * get_elements_in_sve_vector(element_size));

const reg_id_t zn = decode_vreg(Z_REG, extract_uint(enc, 5, 5));
ASSERT(reg_is_z(zn));

const reg_id_t xm = decode_reg(extract_uint(enc, 16, 5), true, false /* XZR */);
ASSERT(reg_is_gpr(xm));

*opnd = opnd_create_vector_base_disp_aarch64(
zn, xm, get_opnd_size_from_offset(element_size), DR_EXTEND_UXTX, false, 0, 0,
mem_transfer, 0);
return true;
}

static inline bool
encode_svemem_vec_sd_gpr16(uint size_bit, uint enc, int opcode, byte *pc, opnd_t opnd,
OUT uint *enc_out)
{

uint single_bit_value = 0;

if (size_bit == 22)
single_bit_value = 1;

// Element size is a part of the constant bits
const aarch64_reg_offset element_size =
BITS(enc, size_bit, size_bit) == single_bit_value ? SINGLE_REG : DOUBLE_REG;

if (!opnd_is_base_disp(opnd) || opnd_get_index(opnd) == DR_REG_NULL ||
get_vector_element_reg_offset(opnd) != element_size)
return false;

bool index_scaled;
uint index_scale_amount;
if (opnd_get_index_extend(opnd, &index_scaled, &index_scale_amount) !=
DR_EXTEND_UXTX ||
index_scaled || index_scale_amount != 0)
return false;

uint zreg_number;
opnd_size_t reg_size = OPSZ_SCALABLE;
IF_RETURN_FALSE(!encode_vreg(&reg_size, &zreg_number, opnd_get_base(opnd)))

const aarch64_reg_offset msz = BITS(enc, 24, 23);
const uint scale = 1 << msz;

const opnd_size_t mem_transfer =
opnd_size_from_bytes(scale * get_elements_in_sve_vector(element_size));
IF_RETURN_FALSE(opnd_get_size(opnd) != mem_transfer)

uint xreg_number;
bool is_x = false;
IF_RETURN_FALSE(!encode_reg(&xreg_number, &is_x, opnd_get_index(opnd), false) ||
!is_x)

*enc_out |= (xreg_number << 16) | (zreg_number << 5);
return true;
}

/*
* svemem_vec_sssd_gpr16: SVE memory address with GPR offset [<Zn>.S/D{, <Xm>}],
* size determined by bit 22
*/

static inline bool
decode_opnd_svemem_vec_22sd_gpr16(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
return decode_svemem_vec_sd_gpr16(22, enc, opcode, pc, opnd);
}

static inline bool
encode_opnd_svemem_vec_22sd_gpr16(uint enc, int opcode, byte *pc, opnd_t opnd,
OUT uint *enc_out)
{
return encode_svemem_vec_sd_gpr16(22, enc, opcode, pc, opnd, enc_out);
}

/* SVE memory operand [<Xn|SP>{, #<imm>, MUL VL}] 1 dest register */

static inline bool
Expand Down Expand Up @@ -8349,66 +8442,16 @@ encode_opnd_x16imm(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_ou
/* svemem_vec_sd_gpr16: SVE memory address with GPR offset [<Zn>.S/D{, <Xm>}] */

static inline bool
decode_opnd_svemem_vec_sd_gpr16(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
decode_opnd_svemem_vec_30sd_gpr16(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
const aarch64_reg_offset msz = BITS(enc, 24, 23);
const uint scale = 1 << msz;

const aarch64_reg_offset element_size =
BITS(enc, 30, 30) > 0 ? DOUBLE_REG : SINGLE_REG;

const opnd_size_t mem_transfer =
opnd_size_from_bytes(scale * get_elements_in_sve_vector(element_size));

const reg_id_t zn = decode_vreg(Z_REG, extract_uint(enc, 5, 5));
ASSERT(reg_is_z(zn));

const reg_id_t xm = decode_reg(extract_uint(enc, 16, 5), true, false /* XZR */);
ASSERT(reg_is_gpr(xm));

*opnd = opnd_create_vector_base_disp_aarch64(
zn, xm, get_opnd_size_from_offset(element_size), DR_EXTEND_UXTX, false, 0, 0,
mem_transfer, 0);
return true;
return decode_svemem_vec_sd_gpr16(30, enc, opcode, pc, opnd);
}

static inline bool
encode_opnd_svemem_vec_sd_gpr16(uint enc, int opcode, byte *pc, opnd_t opnd,
OUT uint *enc_out)
encode_opnd_svemem_vec_30sd_gpr16(uint enc, int opcode, byte *pc, opnd_t opnd,
OUT uint *enc_out)
{
// Element size is a part of the constant bits
const aarch64_reg_offset element_size =
BITS(enc, 30, 30) > 0 ? DOUBLE_REG : SINGLE_REG;

if (!opnd_is_base_disp(opnd) || opnd_get_index(opnd) == DR_REG_NULL ||
get_vector_element_reg_offset(opnd) != element_size)
return false;

bool index_scaled;
uint index_scale_amount;
if (opnd_get_index_extend(opnd, &index_scaled, &index_scale_amount) !=
DR_EXTEND_UXTX ||
index_scaled || index_scale_amount != 0)
return false;

uint zreg_number;
opnd_size_t reg_size = OPSZ_SCALABLE;
IF_RETURN_FALSE(!encode_vreg(&reg_size, &zreg_number, opnd_get_base(opnd)))

const aarch64_reg_offset msz = BITS(enc, 24, 23);
const uint scale = 1 << msz;

const opnd_size_t mem_transfer =
opnd_size_from_bytes(scale * get_elements_in_sve_vector(element_size));
IF_RETURN_FALSE(opnd_get_size(opnd) != mem_transfer)

uint xreg_number;
bool is_x = false;
IF_RETURN_FALSE(!encode_reg(&xreg_number, &is_x, opnd_get_index(opnd), false) ||
!is_x)

*enc_out |= (xreg_number << 16) | (zreg_number << 5);
return true;
return encode_svemem_vec_sd_gpr16(30, enc, opcode, pc, opnd, enc_out);
}

/* index3: index of D subreg in Q register: 0-1 */
Expand Down
24 changes: 19 additions & 5 deletions core/ir/aarch64/codec_sve2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,18 @@
01100100101xxxxx0110x1xxxxxxxxxx n 1070 SVE2 fmlslt z_s_0 : z_s_0 z_msz_bhsd_5 z3_msz_bhsd_16 i3_index_11
01000101xx1xxxxx110xxxxxxxxxxxxx n 1145 SVE2 histcnt z_size_sd_0 : p10_zer_lo z_size_sd_5 z_size_sd_16
01000101001xxxxx101000xxxxxxxxxx n 1071 SVE2 histseg z_b_0 : z_b_5 z_b_16
11000100000xxxxx100xxxxxxxxxxxxx n 1186 SVE2 ldnt1sb z_d_0 : svemem_vec_sd_gpr16 p10_zer_lo
10000100000xxxxx100xxxxxxxxxxxxx n 1186 SVE2 ldnt1sb z_s_0 : svemem_vec_sd_gpr16 p10_zer_lo
11000100100xxxxx100xxxxxxxxxxxxx n 1187 SVE2 ldnt1sh z_d_0 : svemem_vec_sd_gpr16 p10_zer_lo
10000100100xxxxx100xxxxxxxxxxxxx n 1187 SVE2 ldnt1sh z_s_0 : svemem_vec_sd_gpr16 p10_zer_lo
11000101000xxxxx100xxxxxxxxxxxxx n 1188 SVE2 ldnt1sw z_d_0 : svemem_vec_sd_gpr16 p10_zer_lo
11000100000xxxxx110xxxxxxxxxxxxx n 950 SVE2 ldnt1b z_d_0 : svemem_vec_30sd_gpr16 p10_zer_lo
10000100000xxxxx101xxxxxxxxxxxxx n 950 SVE2 ldnt1b z_s_0 : svemem_vec_30sd_gpr16 p10_zer_lo
11000101100xxxxx110xxxxxxxxxxxxx n 992 SVE2 ldnt1d z_d_0 : svemem_vec_30sd_gpr16 p10_zer_lo
11000100100xxxxx110xxxxxxxxxxxxx n 993 SVE2 ldnt1h z_d_0 : svemem_vec_30sd_gpr16 p10_zer_lo
10000100100xxxxx101xxxxxxxxxxxxx n 993 SVE2 ldnt1h z_s_0 : svemem_vec_30sd_gpr16 p10_zer_lo
11000100000xxxxx100xxxxxxxxxxxxx n 1186 SVE2 ldnt1sb z_d_0 : svemem_vec_30sd_gpr16 p10_zer_lo
10000100000xxxxx100xxxxxxxxxxxxx n 1186 SVE2 ldnt1sb z_s_0 : svemem_vec_30sd_gpr16 p10_zer_lo
11000100100xxxxx100xxxxxxxxxxxxx n 1187 SVE2 ldnt1sh z_d_0 : svemem_vec_30sd_gpr16 p10_zer_lo
10000100100xxxxx100xxxxxxxxxxxxx n 1187 SVE2 ldnt1sh z_s_0 : svemem_vec_30sd_gpr16 p10_zer_lo
11000101000xxxxx100xxxxxxxxxxxxx n 1188 SVE2 ldnt1sw z_d_0 : svemem_vec_30sd_gpr16 p10_zer_lo
11000101000xxxxx110xxxxxxxxxxxxx n 994 SVE2 ldnt1w z_d_0 : svemem_vec_30sd_gpr16 p10_zer_lo
10000101000xxxxx101xxxxxxxxxxxxx n 994 SVE2 ldnt1w z_s_0 : svemem_vec_30sd_gpr16 p10_zer_lo
01000101xx1xxxxx100xxxxxxxx0xxxx w 1189 SVE2 match p_size_bh_0 : p10_zer_lo z_size_bh_5 z_size_bh_16
00000100111xxxxx001111xxxxxxxxxx n 1072 SVE2 nbsl z_d_0 : z_d_0 z_d_16 z_d_5
01000101xx1xxxxx100xxxxxxxx1xxxx w 1190 SVE2 nmatch p_size_bh_0 : p10_zer_lo z_size_bh_5 z_size_bh_16
Expand Down Expand Up @@ -223,6 +230,13 @@
01000101xx0xxxxx100011xxxxxxxxxx n 1116 SVE2 ssubltb z_size_hsd_0 : z_sizep1_bhs_5 z_sizep1_bhs_16
01000101xx0xxxxx010100xxxxxxxxxx n 1117 SVE2 ssubwb z_size_hsd_0 : z_size_hsd_5 z_sizep1_bhs_16
01000101xx0xxxxx010101xxxxxxxxxx n 1118 SVE2 ssubwt z_size_hsd_0 : z_size_hsd_5 z_sizep1_bhs_16
11100100000xxxxx001xxxxxxxxxxxxx n 952 SVE2 stnt1b svemem_vec_22sd_gpr16 : z_d_0 p10_lo
11100100010xxxxx001xxxxxxxxxxxxx n 952 SVE2 stnt1b svemem_vec_22sd_gpr16 : z_s_0 p10_lo
11100101100xxxxx001xxxxxxxxxxxxx n 1004 SVE2 stnt1d svemem_vec_30sd_gpr16 : z_d_0 p10_lo
11100100100xxxxx001xxxxxxxxxxxxx n 1005 SVE2 stnt1h svemem_vec_22sd_gpr16 : z_d_0 p10_lo
11100100110xxxxx001xxxxxxxxxxxxx n 1005 SVE2 stnt1h svemem_vec_22sd_gpr16 : z_s_0 p10_lo
11100101000xxxxx001xxxxxxxxxxxxx n 1006 SVE2 stnt1w svemem_vec_22sd_gpr16 : z_d_0 p10_lo
11100101010xxxxx001xxxxxxxxxxxxx n 1006 SVE2 stnt1w svemem_vec_22sd_gpr16 : z_s_0 p10_lo
01000101xx1xxxxx011100xxxxxxxxxx n 1119 SVE2 subhnb z_sizep1_bhs_0 : z_size_hsd_5 z_size_hsd_16
01000101xx1xxxxx011101xxxxxxxxxx n 1120 SVE2 subhnt z_sizep1_bhs_0 : z_sizep1_bhs_0 z_size_hsd_5 z_size_hsd_16
01000100xx011100100xxxxxxxxxxxxx n 474 SVE2 suqadd z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5
Expand Down
Loading
Loading