diff --git a/Cargo.lock b/Cargo.lock index d287d9846d3a..aff0f4286d0a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4148,6 +4148,7 @@ dependencies = [ "itertools 0.12.1", "log", "object", + "pulley-interpreter", "smallvec", "target-lexicon", "thiserror", diff --git a/cranelift/codegen/src/binemit/mod.rs b/cranelift/codegen/src/binemit/mod.rs index edd4d7d7c904..300369cf7dc1 100644 --- a/cranelift/codegen/src/binemit/mod.rs +++ b/cranelift/codegen/src/binemit/mod.rs @@ -119,6 +119,10 @@ pub enum Reloc { S390xTlsGd64, /// s390x TLS GDCall - marker to enable optimization of TLS calls S390xTlsGdCall, + + /// Pulley - call a host function indirectly where the embedder resolving + /// this relocation needs to fill in the expected signature. + PulleyCallIndirectHost, } impl fmt::Display for Reloc { @@ -152,6 +156,7 @@ impl fmt::Display for Reloc { Self::Aarch64Ld64GotLo12Nc => write!(f, "Aarch64AdrGotLo12Nc"), Self::S390xTlsGd64 => write!(f, "TlsGd64"), Self::S390xTlsGdCall => write!(f, "TlsGdCall"), + Self::PulleyCallIndirectHost => write!(f, "PulleyCallIndirectHost"), } } } diff --git a/cranelift/codegen/src/isa/pulley_shared/abi.rs b/cranelift/codegen/src/isa/pulley_shared/abi.rs index 7058c0399568..1f014ff490b9 100644 --- a/cranelift/codegen/src/isa/pulley_shared/abi.rs +++ b/cranelift/codegen/src/isa/pulley_shared/abi.rs @@ -541,36 +541,29 @@ where insts } - fn gen_call(dest: &CallDest, tmp: Writable, info: CallInfo<()>) -> SmallVec<[Self::I; 2]> { - if info.callee_conv == isa::CallConv::Tail || info.callee_conv == isa::CallConv::Fast { - match &dest { - &CallDest::ExtName(ref name, RelocDistance::Near) => smallvec![Inst::Call { - info: Box::new(info.map(|()| name.clone())) - } - .into()], - &CallDest::ExtName(ref name, RelocDistance::Far) => smallvec![ - Inst::LoadExtName { - dst: WritableXReg::try_from(tmp).unwrap(), - name: Box::new(name.clone()), - offset: 0, - } - .into(), - Inst::IndirectCall { - info: Box::new(info.map(|()| XReg::new(tmp.to_reg()).unwrap())) - } - .into(), - ], - &CallDest::Reg(reg) => smallvec![Inst::IndirectCall { - info: Box::new(info.map(|()| XReg::new(*reg).unwrap())) - } - .into()], + fn gen_call( + dest: &CallDest, + _tmp: Writable, + info: CallInfo<()>, + ) -> SmallVec<[Self::I; 2]> { + match dest { + // "near" calls are pulley->pulley calls so they use a normal "call" + // opcode + CallDest::ExtName(name, RelocDistance::Near) => smallvec![Inst::Call { + info: Box::new(info.map(|()| name.clone())) } - } else { - todo!( - "host calls? callee_conv = {:?}; caller_conv = {:?}", - info.callee_conv, - info.caller_conv, - ) + .into()], + // "far" calls are pulley->host calls so they use a different opcode + // which is lowered with a special relocation in the backend. + CallDest::ExtName(name, RelocDistance::Far) => smallvec![Inst::IndirectCallHost { + info: Box::new(info.map(|()| name.clone())) + } + .into()], + // Indirect calls are all assumed to be pulley->pulley calls + CallDest::Reg(reg) => smallvec![Inst::IndirectCall { + info: Box::new(info.map(|()| XReg::new(*reg).unwrap())) + } + .into()], } } diff --git a/cranelift/codegen/src/isa/pulley_shared/inst.isle b/cranelift/codegen/src/isa/pulley_shared/inst.isle index bdce4929d95e..59b0ea7ceb70 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst.isle +++ b/cranelift/codegen/src/isa/pulley_shared/inst.isle @@ -49,6 +49,10 @@ ;; An indirect call to an unknown callee. (IndirectCall (info BoxCallIndInfo)) + ;; An indirect call out to a host-defined function. The host function + ;; pointer is the first "argument" of this function call. + (IndirectCallHost (info BoxCallInfo)) + ;; Unconditional jumps. (Jump (label MachLabel)) diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs index 3772c127955b..1474bdfd08d6 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs @@ -217,6 +217,23 @@ fn pulley_emit

( Inst::IndirectCall { .. } => todo!(), + Inst::IndirectCallHost { info } => { + // Emit a relocation to fill in the actual immediate argument here + // in `call_indirect_host`. + sink.add_reloc(Reloc::PulleyCallIndirectHost, &info.dest, 0); + enc::call_indirect_host(sink, 0_u8); + + if let Some(s) = state.take_stack_map() { + let offset = sink.cur_offset(); + sink.push_user_stack_map(state, offset, s); + } + sink.add_call_site(); + + // If a callee pop is happening here that means that something has + // messed up, these are expected to be "very simple" signatures. + assert!(info.callee_pop_size == 0); + } + Inst::Jump { label } => { sink.use_label_at_offset(start_offset + 1, *label, LabelUse::Jump(1)); sink.add_uncond_branch(start_offset, start_offset + 5, *label); diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs index 0e8d3f346ce4..0ae00d878846 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs @@ -91,7 +91,7 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { collector.reg_def(dst); } - Inst::Call { info } => { + Inst::Call { info } | Inst::IndirectCallHost { info } => { let CallInfo { uses, defs, .. } = &mut **info; for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); @@ -582,6 +582,10 @@ impl Inst { format!("indirect_call {callee}, {info:?}") } + Inst::IndirectCallHost { info } => { + format!("indirect_call_host {info:?}") + } + Inst::Jump { label } => format!("jump {}", label.to_string()), Inst::BrIf { diff --git a/cranelift/codegen/src/machinst/buffer.rs b/cranelift/codegen/src/machinst/buffer.rs index 0c29d79f7da0..c9beb52b8012 100644 --- a/cranelift/codegen/src/machinst/buffer.rs +++ b/cranelift/codegen/src/machinst/buffer.rs @@ -2037,6 +2037,10 @@ impl TextSectionBuilder for MachTextSectionBuilder { self.force_veneers = ForceVeneers::Yes; } + fn write(&mut self, offset: u64, data: &[u8]) { + self.buf.data[offset.try_into().unwrap()..][..data.len()].copy_from_slice(data); + } + fn finish(&mut self, ctrl_plane: &mut ControlPlane) -> Vec { // Double-check all functions were pushed. assert_eq!(self.next_func, self.buf.label_offsets.len()); diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index 6a68728820e2..ed07b1bf0365 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -554,6 +554,10 @@ pub trait TextSectionBuilder { /// A debug-only option which is used to for fn force_veneers(&mut self); + /// Write the `data` provided at `offset`, for example when resolving a + /// relocation. + fn write(&mut self, offset: u64, data: &[u8]); + /// Completes this text section, filling out any final details, and returns /// the bytes of the text section. fn finish(&mut self, ctrl_plane: &mut ControlPlane) -> Vec; diff --git a/cranelift/filetests/filetests/isa/pulley64/call_indirect_host.clif b/cranelift/filetests/filetests/isa/pulley64/call_indirect_host.clif new file mode 100644 index 000000000000..4592004936bb --- /dev/null +++ b/cranelift/filetests/filetests/isa/pulley64/call_indirect_host.clif @@ -0,0 +1,37 @@ +test compile precise-output +target pulley64 + +function %call_indirect_host() { + fn0 = u10:0() system_v +block0: + call fn0() + return +} + +; VCode: +; x30 = xconst8 -16 +; x27 = xadd32 x27, x30 +; store64 sp+8, x28 // flags = notrap aligned +; store64 sp+0, x29 // flags = notrap aligned +; x29 = xmov x27 +; block0: +; indirect_call_host CallInfo { dest: User(userextname0), uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65279, 4294967295, 0] }, callee_conv: SystemV, caller_conv: Fast, callee_pop_size: 0 } +; x28 = load64_u sp+8 // flags = notrap aligned +; x29 = load64_u sp+0 // flags = notrap aligned +; x30 = xconst8 16 +; x27 = xadd32 x27, x30 +; ret +; +; Disassembled: +; xconst8 spilltmp0, -16 +; xadd32 sp, sp, spilltmp0 +; store64_offset8 sp, 8, lr +; store64 sp, fp +; xmov fp, sp +; call_indirect_host 0 +; load64_offset8 lr, sp, 8 +; load64 fp, sp +; xconst8 spilltmp0, 16 +; xadd32 sp, sp, spilltmp0 +; ret + diff --git a/crates/cranelift/Cargo.toml b/crates/cranelift/Cargo.toml index 678457c35eca..635a4eec91e8 100644 --- a/crates/cranelift/Cargo.toml +++ b/crates/cranelift/Cargo.toml @@ -32,11 +32,12 @@ thiserror = { workspace = true } cfg-if = { workspace = true } wasmtime-versioned-export-macros = { workspace = true } itertools = "0.12" +pulley-interpreter = { workspace = true, optional = true } [features] all-arch = ["cranelift-codegen/all-arch"] host-arch = ["cranelift-codegen/host-arch"] -pulley = ["cranelift-codegen/pulley"] +pulley = ["cranelift-codegen/pulley", "dep:pulley-interpreter"] trace-log = ["cranelift-codegen/trace-log"] component-model = ["wasmtime-environ/component-model"] incremental-cache = ["cranelift-codegen/incremental-cache"] diff --git a/crates/cranelift/src/compiler.rs b/crates/cranelift/src/compiler.rs index 36e1b83d76ce..29e959faf0da 100644 --- a/crates/cranelift/src/compiler.rs +++ b/crates/cranelift/src/compiler.rs @@ -119,6 +119,105 @@ impl Compiler { wmemcheck, } } + + /// Peform an indirect call from Cranelift-generated code to native code in + /// Wasmtime itself. + /// + /// For native platforms this is a simple `call_indirect` instruction but + /// for the Pulley backend this is special as it's transitioning from + /// Cranelift-generated bytecode to native code on the host. That requires a + /// special opcode in the interpreter and is modeled slightly differently in + /// Cranelift IR. + fn call_indirect_host( + &self, + builder: &mut FunctionBuilder<'_>, + sig: ir::SigRef, + addr: Value, + args: &[Value], + ) -> ir::Inst { + let signature = &builder.func.dfg.signatures[sig]; + + // When calling the host we should always be using the platform's + // default calling convention since it'll be calling Rust code in + // Wasmtime itself. + assert_eq!(signature.call_conv, self.isa.default_call_conv()); + + #[cfg(feature = "pulley")] + { + use cranelift_codegen::ir::types::{I32, I64, I8}; + + // If pulley is enabled, even if we're not targetting it, determine + // what pulley signature that the input `signature` maps to. This is + // done to ensure that even on native platforms we've always got a + // signature listed in pulley for all platform intrinsics. In theory + // the set of signatures here doesn't change over time all that + // much. If a new signature is added then the `pulley/src/lib.rs` + // file and the `for_each_host_signature!` macro need to be updated. + // In theory that's all that needs to happen as well... + macro_rules! pulley_signum { + ($(fn($($args:ident),*) $(-> $ret:ident)?;)*) => {'outer: { + + let mut ret = 0; + + $( + let mut params = signature.params.iter().map(|p| p.value_type); + let mut results = signature.returns.iter().map(|p| p.value_type); + if true + $(&& params.next() == Some($args))* + && params.next().is_none() + $(&& results.next() == Some($ret))? + && results.next().is_none() + { + break 'outer ret; + } + ret += 1; + )* + + let _ = ret; + unimplemented!("no pulley host signature found for {signature:?}"); + }}; + } + + let pulley_signum = pulley_interpreter::for_each_host_signature!(pulley_signum); + + let is_pulley = match self.isa.triple().architecture { + target_lexicon::Architecture::Pulley32 => true, + target_lexicon::Architecture::Pulley64 => true, + _ => false, + }; + + // If this target is actually pulley then a custom `call` + // instruction is emitted. This will generate a new function with + // the Cranelift-name of a "backend intrinsic" which is how the + // Pulley backend models this special opcode that doesn't otherwise + // map into the Cranelift set of opcodes. + if is_pulley { + let mut new_signature = signature.clone(); + new_signature + .params + .insert(0, ir::AbiParam::new(self.isa.pointer_type())); + let new_sig = builder.func.import_signature(new_signature); + let name = ir::ExternalName::User(builder.func.declare_imported_user_function( + ir::UserExternalName { + namespace: crate::NS_PULLEY_HOSTCALL, + index: pulley_signum, + }, + )); + let func = builder.func.import_function(ir::ExtFuncData { + name, + signature: new_sig, + // This is the signal that a special `call_indirect_host` + // opcode is used to jump from pulley to the host. + colocated: false, + }); + let mut raw_args = vec![addr]; + raw_args.extend_from_slice(args); + return builder.ins().call(func, &raw_args); + } + } + + builder.ins().call_indirect(sig, addr, args) + } } impl wasmtime_environ::Compiler for Compiler { @@ -360,7 +459,8 @@ impl wasmtime_environ::Compiler for Compiler { // Do an indirect call to the callee. let callee_signature = builder.func.import_signature(array_call_sig); - builder.ins().call_indirect( + self.call_indirect_host( + &mut builder, callee_signature, callee, &[callee_vmctx, caller_vmctx, args_base, args_len], @@ -557,9 +657,7 @@ impl wasmtime_environ::Compiler for Compiler { // all the same results as the libcall. let block_params = builder.block_params(block0).to_vec(); let host_sig = builder.func.import_signature(host_sig); - let call = builder - .ins() - .call_indirect(host_sig, func_addr, &block_params); + let call = self.call_indirect_host(&mut builder, host_sig, func_addr, &block_params); let results = builder.func.dfg.inst_results(call).to_vec(); builder.ins().return_(&results); builder.finalize(); diff --git a/crates/cranelift/src/compiler/component.rs b/crates/cranelift/src/compiler/component.rs index cc0b5fe9ef26..8f15f003ca68 100644 --- a/crates/cranelift/src/compiler/component.rs +++ b/crates/cranelift/src/compiler/component.rs @@ -237,9 +237,8 @@ impl<'a> TrampolineCompiler<'a> { i32::try_from(self.offsets.lowering_callee(index)).unwrap(), ); let host_sig = self.builder.import_signature(host_sig); - self.builder - .ins() - .call_indirect(host_sig, host_fn, &callee_args); + self.compiler + .call_indirect_host(&mut self.builder, host_sig, host_fn, &callee_args); match self.abi { Abi::Wasm => { @@ -275,9 +274,8 @@ impl<'a> TrampolineCompiler<'a> { ir::types::I8, i64::from(wasmtime_environ::Trap::AlwaysTrapAdapter as u8), ); - self.builder - .ins() - .call_indirect(host_sig, host_fn, &[vmctx, code]); + self.compiler + .call_indirect_host(&mut self.builder, host_sig, host_fn, &[vmctx, code]); // debug trap in case execution actually falls through, but this // shouldn't ever get hit at runtime. self.builder.ins().trap(TRAP_INTERNAL_ASSERT); @@ -309,10 +307,9 @@ impl<'a> TrampolineCompiler<'a> { let (host_sig, offset) = host::resource_new32(self.isa, &mut self.builder.func); let host_fn = self.load_libcall(vmctx, offset); - let call = self - .builder - .ins() - .call_indirect(host_sig, host_fn, &host_args); + let call = + self.compiler + .call_indirect_host(&mut self.builder, host_sig, host_fn, &host_args); let result = self.builder.func.dfg.inst_results(call)[0]; self.abi_store_results(&[result]); } @@ -343,10 +340,9 @@ impl<'a> TrampolineCompiler<'a> { let (host_sig, offset) = host::resource_rep32(self.isa, &mut self.builder.func); let host_fn = self.load_libcall(vmctx, offset); - let call = self - .builder - .ins() - .call_indirect(host_sig, host_fn, &host_args); + let call = + self.compiler + .call_indirect_host(&mut self.builder, host_sig, host_fn, &host_args); let result = self.builder.func.dfg.inst_results(call)[0]; self.abi_store_results(&[result]); } @@ -373,10 +369,9 @@ impl<'a> TrampolineCompiler<'a> { let (host_sig, offset) = host::resource_drop(self.isa, &mut self.builder.func); let host_fn = self.load_libcall(vmctx, offset); - let call = self - .builder - .ins() - .call_indirect(host_sig, host_fn, &host_args); + let call = + self.compiler + .call_indirect_host(&mut self.builder, host_sig, host_fn, &host_args); let should_run_destructor = self.builder.func.dfg.inst_results(call)[0]; let resource_ty = self.types[resource].ty; @@ -556,10 +551,9 @@ impl<'a> TrampolineCompiler<'a> { host_args.extend(args[2..].iter().copied()); let (host_sig, offset) = get_libcall(self.isa, &mut self.builder.func); let host_fn = self.load_libcall(vmctx, offset); - let call = self - .builder - .ins() - .call_indirect(host_sig, host_fn, &host_args); + let call = + self.compiler + .call_indirect_host(&mut self.builder, host_sig, host_fn, &host_args); let results = self.builder.func.dfg.inst_results(call).to_vec(); self.builder.ins().return_(&results); } @@ -786,7 +780,9 @@ impl TrampolineCompiler<'_> { )); args.push(self.builder.ins().stack_addr(pointer_type, slot, 0)); } - let call = self.builder.ins().call_indirect(sig, libcall, &args); + let call = self + .compiler + .call_indirect_host(&mut self.builder, sig, libcall, &args); let mut results = self.builder.func.dfg.inst_results(call).to_vec(); if uses_retptr { results.push(self.builder.ins().load( diff --git a/crates/cranelift/src/lib.rs b/crates/cranelift/src/lib.rs index cba36b51f613..713381277cbf 100644 --- a/crates/cranelift/src/lib.rs +++ b/crates/cranelift/src/lib.rs @@ -212,6 +212,9 @@ pub const NS_WASM_FUNC: u32 = 0; /// function through an indirect function call loaded by the `VMContext`. pub const NS_WASMTIME_BUILTIN: u32 = 1; +/// TODO +pub const NS_PULLEY_HOSTCALL: u32 = 2; + /// A record of a relocation to perform. #[derive(Debug, Clone, PartialEq, Eq)] pub struct Relocation { @@ -290,6 +293,7 @@ fn mach_reloc_to_reloc( NS_WASMTIME_BUILTIN => { RelocationTarget::Builtin(BuiltinFunctionIndex::from_u32(name.index)) } + NS_PULLEY_HOSTCALL => RelocationTarget::PulleyHostcall(name.index), _ => panic!("unknown namespace {}", name.namespace), } } diff --git a/crates/cranelift/src/obj.rs b/crates/cranelift/src/obj.rs index b0fd17197fbe..e419cec4e0fe 100644 --- a/crates/cranelift/src/obj.rs +++ b/crates/cranelift/src/obj.rs @@ -133,6 +133,7 @@ impl<'a> ModuleTextBuilder<'a> { } for r in compiled_func.relocations() { + let reloc_offset = off + u64::from(r.offset); match r.reloc_target { // Relocations against user-defined functions means that this is // a relocation against a module-local function, typically a @@ -144,7 +145,7 @@ impl<'a> ModuleTextBuilder<'a> { let target = resolve_reloc_target(r.reloc_target); if self .text - .resolve_reloc(off + u64::from(r.offset), r.reloc, r.addend, target) + .resolve_reloc(reloc_offset, r.reloc, r.addend, target) { continue; } @@ -198,12 +199,30 @@ impl<'a> ModuleTextBuilder<'a> { object::write::Relocation { symbol, flags, - offset: off + u64::from(r.offset), + offset: reloc_offset, addend: r.addend, }, ) .unwrap(); } + + // This relocation is used to fill in which hostcall signature + // is desired within the `call_indirect_host` opcode of Pulley + // itself. The relocation target is the start of the instruction + // and the goal is to insert the static signature number, `n`, + // into the instruction. + // + // At this time the instructions is 4-bytes large (3 bytes for + // the opcode, one for the one-byte payload), so we target the + // third byte. The value `n` here should always fit within a + // byte. + // + // See the `test_call_indirect_host_width` in + // `pulley/tests/all.rs` for this guarantee as well. + RelocationTarget::PulleyHostcall(n) => { + let byte = u8::try_from(n).unwrap(); + self.text.write(reloc_offset + 3, &[byte]); + } }; } (symbol_id, off..off + body_len) diff --git a/crates/environ/src/compile/mod.rs b/crates/environ/src/compile/mod.rs index 59ef62e0bdba..51da11a9f672 100644 --- a/crates/environ/src/compile/mod.rs +++ b/crates/environ/src/compile/mod.rs @@ -80,6 +80,8 @@ pub enum RelocationTarget { Builtin(BuiltinFunctionIndex), /// A compiler-generated libcall. HostLibcall(obj::LibCall), + /// A pulley->host call from the interpreter. + PulleyHostcall(u32), } /// Implementation of an incremental compilation's key/value cache store. diff --git a/crates/wasmtime/src/compile.rs b/crates/wasmtime/src/compile.rs index 62d1accb653b..86339b44dba3 100644 --- a/crates/wasmtime/src/compile.rs +++ b/crates/wasmtime/src/compile.rs @@ -717,7 +717,7 @@ impl FunctionIndices { [&CompileKey::WASM_TO_BUILTIN_TRAMPOLINE_KIND] [&CompileKey::wasm_to_builtin_trampoline(builtin)] .unwrap_function(), - RelocationTarget::HostLibcall(_) => { + RelocationTarget::HostLibcall(_) | RelocationTarget::PulleyHostcall(_) => { unreachable!("relocation is resolved at runtime, not compile time"); } }, diff --git a/pulley/fuzz/src/interp.rs b/pulley/fuzz/src/interp.rs index 5fd70b082c16..ec703352727e 100644 --- a/pulley/fuzz/src/interp.rs +++ b/pulley/fuzz/src/interp.rs @@ -124,5 +124,6 @@ fn extended_op_is_safe_for_fuzzing(op: &ExtendedOp) -> bool { ExtendedOp::Trap(_) => true, ExtendedOp::Nop(_) => true, ExtendedOp::GetSp(GetSp { dst, .. }) => !dst.is_special(), + ExtendedOp::CallIndirectHost(_) => false, } } diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index c0817f454144..3ccbc9d88293 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -1177,4 +1177,77 @@ impl ExtendedOpVisitor for Interpreter<'_> { self.state[dst].set_u64(sp); ControlFlow::Continue(()) } + + /// This instructions is sort of like a `call` instruction except that it + /// delegates to the host itself. That means that ABI details are baked in + /// here such as where various arguments are. + /// + /// This will load the arguments from the `xN` registers, the first being + /// the function pointer on the host to call. Since we don't have a + /// libffi-like solution here the way this works is that the + /// `for_each_host_signature!` macro statically enumerates all possible + /// signatures. The `sig` payload here selects one of the mwhich we dispatch + /// to here. Note that we mostly just try to get the width of each argument + /// correct, whether or not it's a pointer is not actually tracked here. + /// That means that, like the rest of Pulley, this isn't compatible with + /// strict provenance pointer rules. + fn call_indirect_host(&mut self, sig: u8) -> ControlFlow { + let raw = self.state[XReg::x0].get_ptr::(); + let mut n = 0; + let mut arg = 1; + + type I8 = i8; + type I32 = i32; + type I64 = i64; + + macro_rules! call_host { + ($(fn($($args:ident),*) $(-> $ret:ident)?;)*) => {$( + // We're relying on LLVM to boil away most of this boilerplate + // as this is a bunch of `if` statements that should be a + // `match`. + if sig == n { + union Convert { + raw: *mut u8, + f: unsafe extern "C" fn($($args),*) $(-> $ret)?, + } + let ptr = Convert { raw }.f; + + // Arguments are loaded from subsequent registers after + // `x0` and are tracked by `arg`. + let ret = ptr( + $({ + let reg = XReg::new_unchecked(arg); + arg += 1; + let reg = &self.state[reg]; + call_host!(@get $args reg) + },)* + ); + let _ = arg; // ignore the last increment of `arg` + + // If this function produce a result the ABI is that we + // place it into `x0`. + let dst = &mut self.state[XReg::x0]; + $(call_host!(@set $ret dst ret);)? + let _ = (ret, dst); // ignore if there was no return value + } + n += 1; + )*}; + + (@get I8 $reg:ident) => ($reg.get_i32() as i8); + (@get I32 $reg:ident) => ($reg.get_i32()); + (@get I64 $reg:ident) => ($reg.get_i64()); + + (@set I32 $dst:ident $val:ident) => ($dst.set_i32($val);); + (@set I64 $dst:ident $val:ident) => ($dst.set_i64($val);); + + } + + unsafe { + for_each_host_signature!(call_host); + } + + let _ = n; // ignore the last increment of `n` + + ControlFlow::Continue(()) + } } diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index b80a5646ec9b..c01120c67540 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -192,6 +192,72 @@ macro_rules! for_each_extended_op { /// Copy the special `sp` stack pointer register into an `x` register. get_sp = GetSp { dst: XReg }; + /// A special opcode to use an indirect function call to reenter the + /// host from the interpreter. + /// + /// This is used to implement host intrinsics such as `memory.grow` + /// for example where that needs to reenter the host from the + /// interpreter. + /// + /// The `sig` immediate here is the Nth signature in the + /// `for_each_host_signature!` macro below. The 0th "argument", in + /// register x0, is the function pointer that's being called and all + /// further arguments follow after that in registers. + call_indirect_host = CallIndirectHost { sig: u8 }; + } + }; +} + +/// All known signatures that Wasmtime needs to invoke for host functions. +/// +/// This is used in conjunction with the `call_indirect_host` opcode to jump +/// from interpreter bytecode back into the host to peform tasks such as +/// `memory.grow` or call imported host functions. +/// +/// Each function signature here correspond to a "builtin" either for core wasm +/// or for the component model. This also includes the "array call abi" for +/// calling host functions. +/// +/// TODO: this probably needs a "pointer type" to avoid doubling the size of +/// this on 32-bit platforms. That's left for a future refactoring when it's +/// easier to start compiling everything for 32-bit platforms. That'll require +/// more of the pulley backend fleshed out and the integration with Wasmtime +/// more fleshed out as well. +#[macro_export] +macro_rules! for_each_host_signature { + ($m:ident) => { + $m! { + fn(I64); + fn(I64, I32); + fn(I64, I32) -> I32; + fn(I64, I32, I32) -> I32; + fn(I64, I32, I32, I32) -> I32; + fn(I64, I32, I32, I32, I32, I32); + fn(I64, I32, I32, I32, I32) -> I32; + fn(I64, I32, I32, I32, I32, I32, I32); + fn(I64, I32, I32) -> I64; + fn(I64, I32, I32, I64, I32, I32); + fn(I64, I32, I32, I64, I64, I64); + fn(I64, I32) -> I64; + fn(I64, I32, I64, I32) -> I32; + fn(I64, I32, I64, I32, I64); + fn(I64, I32, I64, I32) -> I64; + fn(I64, I32, I64, I32, I64) -> I32; + fn(I64, I32, I64, I32, I64, I64); + fn(I64, I32, I64) -> I64; + fn(I64, I32, I64, I64, I64); + fn(I64, I32, I64, I64) -> I64; + fn(I64, I32, I64, I64, I64) -> I32; + fn(I64) -> I64; + fn(I64, I64) -> I32; + fn(I64, I64, I32) -> I64; + fn(I64, I64, I32, I64, I64, I64, I8, I64, I64); + fn(I64, I64, I64); + fn(I64, I64, I64, I64); + fn(I64, I64, I64) -> I64; + fn(I64, I64, I64, I64) -> I64; + fn(I64, I64, I64, I64, I64) -> I64; + fn(I64, I8); } }; } diff --git a/pulley/tests/all/main.rs b/pulley/tests/all/main.rs index bb35ad1c4442..efbbd9fc7e76 100644 --- a/pulley/tests/all/main.rs +++ b/pulley/tests/all/main.rs @@ -3,3 +3,13 @@ mod disas; #[cfg(feature = "interp")] mod interp; + +// Test the property relied on by `crates/cranelift/src/obj.rs` when filling in +// the `PulleyHostcall` relocation. +#[test] +fn test_call_indirect_host_width() { + let mut dst = Vec::new(); + pulley_interpreter::encode::call_indirect_host(&mut dst, 1_u8); + assert_eq!(dst.len(), 4); + assert_eq!(dst[3], 1); +}