Skip to content

Commit

Permalink
pulley: Implement interpreter-to-host calls
Browse files Browse the repository at this point in the history
This commit is an initial stab at implementing interpreter-to-host
communication in Pulley. The basic problem is that Pulley needs the
ability to call back into Wasmtime to implement tasks such as
`memory.grow`, imported functions, etc. For native platforms this is a
simple `call_indirect` operation in Cranelift but the story for Pulley
must be different because it's effectively switching from interpreted
code to native code.

The initial idea for this in bytecodealliance#9651 is replaced here and looks mostly
similar but with a few changes. The overall structure of how this works
is:

* A new `call_indirect_host` opcode is added to Pulley.
  * Function signatures that can be called from Pulley bytecode are
    statically enumerated at build-time.
  * This enables the implementation of `call_indirect_host` to take an
    immediate of which signature is being used and cast the function
    pointer to the right type.
* A new pulley-specific relocation is added to Cranelift for this opcode.
  * `RelocDistance::Far` calls to a name trigger the use of
    `call_indirect_host`.
  * The relocation is filled in by Wasmtime after compilation where the
    signature number is inserted.
  * A new `NS_*` value for user-function namespaces is reserved in
    `wasmtime-cranelift` for this new namespace of functions.
* Code generation for Pulley in `wasmtime-cranelift` now has
  Pulley-specific handling of the wasm-to-host transition where all
  previous `call_indirect` instructions are replaced with a call to a
  "backend intrinsic" which gets lowered to a `call_indirect_host`.

Note that most of this still isn't hooked up everywhere in Wasmtime.
That means that the testing here is pretty light at this time. It'll
require a fair bit more work to get everything fully integrated from
Wasmtime in Pulley. This is expected to be one of the significant
remaining chunks of work and should help unblock future testing (or make
those diffs smaller ideally).
  • Loading branch information
alexcrichton committed Nov 22, 2024
1 parent 60ab850 commit 0ea4d86
Show file tree
Hide file tree
Showing 20 changed files with 400 additions and 61 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions cranelift/codegen/src/binemit/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,10 @@ pub enum Reloc {
S390xTlsGd64,
/// s390x TLS GDCall - marker to enable optimization of TLS calls
S390xTlsGdCall,

/// Pulley - call a host function indirectly where the embedder resolving
/// this relocation needs to fill in the expected signature.
PulleyCallIndirectHost,
}

impl fmt::Display for Reloc {
Expand Down Expand Up @@ -152,6 +156,7 @@ impl fmt::Display for Reloc {
Self::Aarch64Ld64GotLo12Nc => write!(f, "Aarch64AdrGotLo12Nc"),
Self::S390xTlsGd64 => write!(f, "TlsGd64"),
Self::S390xTlsGdCall => write!(f, "TlsGdCall"),
Self::PulleyCallIndirectHost => write!(f, "PulleyCallIndirectHost"),
}
}
}
Expand Down
51 changes: 22 additions & 29 deletions cranelift/codegen/src/isa/pulley_shared/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -541,36 +541,29 @@ where
insts
}

fn gen_call(dest: &CallDest, tmp: Writable<Reg>, info: CallInfo<()>) -> SmallVec<[Self::I; 2]> {
if info.callee_conv == isa::CallConv::Tail || info.callee_conv == isa::CallConv::Fast {
match &dest {
&CallDest::ExtName(ref name, RelocDistance::Near) => smallvec![Inst::Call {
info: Box::new(info.map(|()| name.clone()))
}
.into()],
&CallDest::ExtName(ref name, RelocDistance::Far) => smallvec![
Inst::LoadExtName {
dst: WritableXReg::try_from(tmp).unwrap(),
name: Box::new(name.clone()),
offset: 0,
}
.into(),
Inst::IndirectCall {
info: Box::new(info.map(|()| XReg::new(tmp.to_reg()).unwrap()))
}
.into(),
],
&CallDest::Reg(reg) => smallvec![Inst::IndirectCall {
info: Box::new(info.map(|()| XReg::new(*reg).unwrap()))
}
.into()],
fn gen_call(
dest: &CallDest,
_tmp: Writable<Reg>,
info: CallInfo<()>,
) -> SmallVec<[Self::I; 2]> {
match dest {
// "near" calls are pulley->pulley calls so they use a normal "call"
// opcode
CallDest::ExtName(name, RelocDistance::Near) => smallvec![Inst::Call {
info: Box::new(info.map(|()| name.clone()))
}
} else {
todo!(
"host calls? callee_conv = {:?}; caller_conv = {:?}",
info.callee_conv,
info.caller_conv,
)
.into()],
// "far" calls are pulley->host calls so they use a different opcode
// which is lowered with a special relocation in the backend.
CallDest::ExtName(name, RelocDistance::Far) => smallvec![Inst::IndirectCallHost {
info: Box::new(info.map(|()| name.clone()))
}
.into()],
// Indirect calls are all assumed to be pulley->pulley calls
CallDest::Reg(reg) => smallvec![Inst::IndirectCall {
info: Box::new(info.map(|()| XReg::new(*reg).unwrap()))
}
.into()],
}
}

Expand Down
4 changes: 4 additions & 0 deletions cranelift/codegen/src/isa/pulley_shared/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@
;; An indirect call to an unknown callee.
(IndirectCall (info BoxCallIndInfo))

;; An indirect call out to a host-defined function. The host function
;; pointer is the first "argument" of this function call.
(IndirectCallHost (info BoxCallInfo))

;; Unconditional jumps.
(Jump (label MachLabel))

Expand Down
17 changes: 17 additions & 0 deletions cranelift/codegen/src/isa/pulley_shared/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,23 @@ fn pulley_emit<P>(

Inst::IndirectCall { .. } => todo!(),

Inst::IndirectCallHost { info } => {
// Emit a relocation to fill in the actual immediate argument here
// in `call_indirect_host`.
sink.add_reloc(Reloc::PulleyCallIndirectHost, &info.dest, 0);
enc::call_indirect_host(sink, 0_u8);

if let Some(s) = state.take_stack_map() {
let offset = sink.cur_offset();
sink.push_user_stack_map(state, offset, s);
}
sink.add_call_site();

// If a callee pop is happening here that means that something has
// messed up, these are expected to be "very simple" signatures.
assert!(info.callee_pop_size == 0);
}

Inst::Jump { label } => {
sink.use_label_at_offset(start_offset + 1, *label, LabelUse::Jump(1));
sink.add_uncond_branch(start_offset, start_offset + 5, *label);
Expand Down
6 changes: 5 additions & 1 deletion cranelift/codegen/src/isa/pulley_shared/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
collector.reg_def(dst);
}

Inst::Call { info } => {
Inst::Call { info } | Inst::IndirectCallHost { info } => {
let CallInfo { uses, defs, .. } = &mut **info;
for CallArgPair { vreg, preg } in uses {
collector.reg_fixed_use(vreg, *preg);
Expand Down Expand Up @@ -582,6 +582,10 @@ impl Inst {
format!("indirect_call {callee}, {info:?}")
}

Inst::IndirectCallHost { info } => {
format!("indirect_call_host {info:?}")
}

Inst::Jump { label } => format!("jump {}", label.to_string()),

Inst::BrIf {
Expand Down
4 changes: 4 additions & 0 deletions cranelift/codegen/src/machinst/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2037,6 +2037,10 @@ impl<I: VCodeInst> TextSectionBuilder for MachTextSectionBuilder<I> {
self.force_veneers = ForceVeneers::Yes;
}

fn write(&mut self, offset: u64, data: &[u8]) {
self.buf.data[offset.try_into().unwrap()..][..data.len()].copy_from_slice(data);
}

fn finish(&mut self, ctrl_plane: &mut ControlPlane) -> Vec<u8> {
// Double-check all functions were pushed.
assert_eq!(self.next_func, self.buf.label_offsets.len());
Expand Down
4 changes: 4 additions & 0 deletions cranelift/codegen/src/machinst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,10 @@ pub trait TextSectionBuilder {
/// A debug-only option which is used to for
fn force_veneers(&mut self);

/// Write the `data` provided at `offset`, for example when resolving a
/// relocation.
fn write(&mut self, offset: u64, data: &[u8]);

/// Completes this text section, filling out any final details, and returns
/// the bytes of the text section.
fn finish(&mut self, ctrl_plane: &mut ControlPlane) -> Vec<u8>;
Expand Down
37 changes: 37 additions & 0 deletions cranelift/filetests/filetests/isa/pulley64/call_indirect_host.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
test compile precise-output
target pulley64

function %call_indirect_host() {
fn0 = u10:0() system_v
block0:
call fn0()
return
}

; VCode:
; x30 = xconst8 -16
; x27 = xadd32 x27, x30
; store64 sp+8, x28 // flags = notrap aligned
; store64 sp+0, x29 // flags = notrap aligned
; x29 = xmov x27
; block0:
; indirect_call_host CallInfo { dest: User(userextname0), uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65279, 4294967295, 0] }, callee_conv: SystemV, caller_conv: Fast, callee_pop_size: 0 }
; x28 = load64_u sp+8 // flags = notrap aligned
; x29 = load64_u sp+0 // flags = notrap aligned
; x30 = xconst8 16
; x27 = xadd32 x27, x30
; ret
;
; Disassembled:
; 0: 14 1e f0 xconst8 spilltmp0, -16
; 3: 18 7b 7b xadd32 sp, sp, spilltmp0
; 6: 32 1b 08 1c store64_offset8 sp, 8, lr
; a: 30 1b 1d store64 sp, fp
; d: 11 1d 1b xmov fp, sp
; 10: 43 03 00 00 call_indirect_host 0
; 14: 2b 1c 1b 08 load64_offset8 lr, sp, 8
; 18: 28 1d 1b load64 fp, sp
; 1b: 14 1e 10 xconst8 spilltmp0, 16
; 1e: 18 7b 7b xadd32 sp, sp, spilltmp0
; 21: 00 ret

3 changes: 2 additions & 1 deletion crates/cranelift/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,12 @@ thiserror = { workspace = true }
cfg-if = { workspace = true }
wasmtime-versioned-export-macros = { workspace = true }
itertools = "0.12"
pulley-interpreter = { workspace = true, optional = true }

[features]
all-arch = ["cranelift-codegen/all-arch"]
host-arch = ["cranelift-codegen/host-arch"]
pulley = ["cranelift-codegen/pulley"]
pulley = ["cranelift-codegen/pulley", "dep:pulley-interpreter"]
trace-log = ["cranelift-codegen/trace-log"]
component-model = ["wasmtime-environ/component-model"]
incremental-cache = ["cranelift-codegen/incremental-cache"]
Expand Down
106 changes: 102 additions & 4 deletions crates/cranelift/src/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,105 @@ impl Compiler {
wmemcheck,
}
}

/// Peform an indirect call from Cranelift-generated code to native code in
/// Wasmtime itself.
///
/// For native platforms this is a simple `call_indirect` instruction but
/// for the Pulley backend this is special as it's transitioning from
/// Cranelift-generated bytecode to native code on the host. That requires a
/// special opcode in the interpreter and is modeled slightly differently in
/// Cranelift IR.
fn call_indirect_host(
&self,
builder: &mut FunctionBuilder<'_>,
sig: ir::SigRef,
addr: Value,
args: &[Value],
) -> ir::Inst {
let signature = &builder.func.dfg.signatures[sig];

// When calling the host we should always be using the platform's
// default calling convention since it'll be calling Rust code in
// Wasmtime itself.
assert_eq!(signature.call_conv, self.isa.default_call_conv());

#[cfg(feature = "pulley")]
{
use cranelift_codegen::ir::types::{I32, I64, I8};

// If pulley is enabled, even if we're not targetting it, determine
// what pulley signature that the input `signature` maps to. This is
// done to ensure that even on native platforms we've always got a
// signature listed in pulley for all platform intrinsics. In theory
// the set of signatures here doesn't change over time all that
// much. If a new signature is added then the `pulley/src/lib.rs`
// file and the `for_each_host_signature!` macro need to be updated.
// In theory that's all that needs to happen as well...
macro_rules! pulley_signum {
($(fn($($args:ident),*) $(-> $ret:ident)?;)*) => {'outer: {

let mut ret = 0;

$(
let mut params = signature.params.iter().map(|p| p.value_type);
let mut results = signature.returns.iter().map(|p| p.value_type);
if true
$(&& params.next() == Some($args))*
&& params.next().is_none()
$(&& results.next() == Some($ret))?
&& results.next().is_none()
{
break 'outer ret;
}
ret += 1;
)*

let _ = ret;
unimplemented!("no pulley host signature found for {signature:?}");
}};
}

let pulley_signum = pulley_interpreter::for_each_host_signature!(pulley_signum);

let is_pulley = match self.isa.triple().architecture {
target_lexicon::Architecture::Pulley32 => true,
target_lexicon::Architecture::Pulley64 => true,
_ => false,
};

// If this target is actually pulley then a custom `call`
// instruction is emitted. This will generate a new function with
// the Cranelift-name of a "backend intrinsic" which is how the
// Pulley backend models this special opcode that doesn't otherwise
// map into the Cranelift set of opcodes.
if is_pulley {
let mut new_signature = signature.clone();
new_signature
.params
.insert(0, ir::AbiParam::new(self.isa.pointer_type()));
let new_sig = builder.func.import_signature(new_signature);
let name = ir::ExternalName::User(builder.func.declare_imported_user_function(
ir::UserExternalName {
namespace: crate::NS_PULLEY_HOSTCALL,
index: pulley_signum,
},
));
let func = builder.func.import_function(ir::ExtFuncData {
name,
signature: new_sig,
// This is the signal that a special `call_indirect_host`
// opcode is used to jump from pulley to the host.
colocated: false,
});
let mut raw_args = vec![addr];
raw_args.extend_from_slice(args);
return builder.ins().call(func, &raw_args);
}
}

builder.ins().call_indirect(sig, addr, args)
}
}

impl wasmtime_environ::Compiler for Compiler {
Expand Down Expand Up @@ -360,7 +459,8 @@ impl wasmtime_environ::Compiler for Compiler {

// Do an indirect call to the callee.
let callee_signature = builder.func.import_signature(array_call_sig);
builder.ins().call_indirect(
self.call_indirect_host(
&mut builder,
callee_signature,
callee,
&[callee_vmctx, caller_vmctx, args_base, args_len],
Expand Down Expand Up @@ -557,9 +657,7 @@ impl wasmtime_environ::Compiler for Compiler {
// all the same results as the libcall.
let block_params = builder.block_params(block0).to_vec();
let host_sig = builder.func.import_signature(host_sig);
let call = builder
.ins()
.call_indirect(host_sig, func_addr, &block_params);
let call = self.call_indirect_host(&mut builder, host_sig, func_addr, &block_params);
let results = builder.func.dfg.inst_results(call).to_vec();
builder.ins().return_(&results);
builder.finalize();
Expand Down
Loading

0 comments on commit 0ea4d86

Please sign in to comment.