Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

wmemcheck support for additional allocation functions and granular memory #9641

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 142 additions & 20 deletions crates/cranelift/src/func_environ.rs
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,112 @@ impl<'module_environment> FuncEnvironment<'module_environment> {
builder.ins().call(check_malloc, &[vmctx, retval, len]);
}

#[cfg(feature = "wmemcheck")]
fn hook_calloc_exit(&mut self, builder: &mut FunctionBuilder, retvals: &[ir::Value]) {
let check_calloc = self.builtin_functions.check_calloc(builder.func);
let vmctx = self.vmctx_val(&mut builder.cursor());
let func_args = builder
.func
.dfg
.block_params(builder.func.layout.entry_block().unwrap());
let (count, size) = if func_args.len() < 4 {
return;
} else {
(func_args[2], func_args[3])
};
let retval = if retvals.len() < 1 {
return;
} else {
retvals[0]
};
builder.ins().call(check_calloc, &[vmctx, retval, count, size]);
}

#[cfg(feature = "wmemcheck")]
fn hook_realloc_exit(&mut self, builder: &mut FunctionBuilder, retvals: &[ir::Value]) {
let check_realloc = self.builtin_functions.check_realloc(builder.func);
let vmctx = self.vmctx_val(&mut builder.cursor());
let func_args = builder
.func
.dfg
.block_params(builder.func.layout.entry_block().unwrap());
let (ptr, len) = if func_args.len() < 4 {
return;
} else {
// If a function named `realloc` has at least two arguments, we assume the
// first arguments are the pointer and requested allocation size.
(func_args[2], func_args[3])
};
let retval = if retvals.len() < 1 {
return;
} else {
retvals[0]
};
builder.ins().call(check_realloc, &[vmctx, retval, ptr, len]);
}

#[cfg(feature = "wmemcheck")]
fn hook_malloc_usable_size_exit(&mut self, builder: &mut FunctionBuilder, retvals: &[ir::Value]) {
let check_malloc_usable_size = self.builtin_functions.check_malloc_usable_size(builder.func);
let vmctx = self.vmctx_val(&mut builder.cursor());
let func_args = builder
.func
.dfg
.block_params(builder.func.layout.entry_block().unwrap());
let ptr = if func_args.len() < 3 {
return;
} else {
func_args[2]
};
let retval = if retvals.len() < 1 {
return;
} else {
retvals[0]
};
builder.ins().call(check_malloc_usable_size, &[vmctx, retval, ptr]);
}

#[cfg(feature = "wmemcheck")]
fn hook_posix_memalign_exit(&mut self, builder: &mut FunctionBuilder) {
let check_posix_memalign = self.builtin_functions.check_posix_memalign(builder.func);
let vmctx = self.vmctx_val(&mut builder.cursor());
let func_args = builder
.func
.dfg
.block_params(builder.func.layout.entry_block().unwrap());
let (outptr, _alignment, size) = if func_args.len() < 5 {
return;
} else {
// If a function named `malloc` has at least one argument, we assume the
// first argument is the requested allocation size.
(func_args[2], func_args[3], func_args[4])
};
builder.ins().call(check_posix_memalign, &[vmctx, outptr, size]);
}

#[cfg(feature = "wmemcheck")]
fn hook_aligned_alloc(&mut self, builder: &mut FunctionBuilder, retvals: &[ir::Value]) {
let check_malloc = self.builtin_functions.check_malloc(builder.func);
let vmctx = self.vmctx_val(&mut builder.cursor());
let func_args = builder
.func
.dfg
.block_params(builder.func.layout.entry_block().unwrap());
let (_alignment, size) = if func_args.len() < 4 {
return;
} else {
// If a function named `malloc` has at least one argument, we assume the
// first argument is the requested allocation size.
(func_args[2], func_args[3])
};
let retval = if retvals.len() < 1 {
return;
} else {
retvals[0]
};
builder.ins().call(check_malloc, &[vmctx, retval, size]);
}

#[cfg(feature = "wmemcheck")]
fn hook_free_exit(&mut self, builder: &mut FunctionBuilder) {
let check_free = self.builtin_functions.check_free(builder.func);
Expand Down Expand Up @@ -927,17 +1033,10 @@ impl<'module_environment> FuncEnvironment<'module_environment> {
}

#[cfg(feature = "wmemcheck")]
fn check_malloc_start(&mut self, builder: &mut FunctionBuilder) {
let malloc_start = self.builtin_functions.malloc_start(builder.func);
let vmctx = self.vmctx_val(&mut builder.cursor());
builder.ins().call(malloc_start, &[vmctx]);
}

#[cfg(feature = "wmemcheck")]
fn check_free_start(&mut self, builder: &mut FunctionBuilder) {
let free_start = self.builtin_functions.free_start(builder.func);
fn hook_memcheck_off(&mut self, builder: &mut FunctionBuilder) {
let memcheck_off = self.builtin_functions.memcheck_off(builder.func);
let vmctx = self.vmctx_val(&mut builder.cursor());
builder.ins().call(free_start, &[vmctx]);
builder.ins().call(memcheck_off, &[vmctx]);
}

#[cfg(feature = "wmemcheck")]
Expand Down Expand Up @@ -3088,11 +3187,22 @@ impl<'module_environment> crate::translate::FuncEnvironment

#[cfg(feature = "wmemcheck")]
if self.wmemcheck {
let func_name = self.current_func_name(builder);
if func_name == Some("malloc") {
self.check_malloc_start(builder);
} else if func_name == Some("free") {
self.check_free_start(builder);
match self.current_func_name(builder) {
Some("__wrap_malloc") | Some("malloc") =>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add a comment here noting where the __wrap_* variants come from? That's useful both for its own sake (readers' understanding) but also so that if things change in the future with the external thing causing this need, we can re-evaluate.

self.hook_memcheck_off(builder),
Some("__wrap_calloc") | Some("calloc") =>
self.hook_memcheck_off(builder),
Some("__wrap_realloc") | Some("realloc") =>
self.hook_memcheck_off(builder),
Some("__wrap_malloc_usable_size") | Some("malloc_usable_size") =>
self.hook_memcheck_off(builder),
Some("__wrap_posix_memalign") | Some("posix_memalign") =>
self.hook_memcheck_off(builder),
Some("__wrap_aligned_alloc") | Some("aligned_alloc") =>
self.hook_memcheck_off(builder),
Some("__wrap_free") | Some("free") =>
self.hook_memcheck_off(builder),
_ => ()
}
}

Expand Down Expand Up @@ -3141,11 +3251,23 @@ impl<'module_environment> crate::translate::FuncEnvironment
#[cfg(feature = "wmemcheck")]
fn handle_before_return(&mut self, retvals: &[ir::Value], builder: &mut FunctionBuilder) {
if self.wmemcheck {
let func_name = self.current_func_name(builder);
if func_name == Some("malloc") {
self.hook_malloc_exit(builder, retvals);
} else if func_name == Some("free") {
self.hook_free_exit(builder);
let name = self.current_func_name(builder);
match name {
Some("__wrap_malloc") | Some("malloc") =>
self.hook_malloc_exit(builder, retvals),
Some("__wrap_calloc") | Some("calloc") =>
self.hook_calloc_exit(builder, retvals),
Some("__wrap_realloc") | Some("realloc") =>
self.hook_realloc_exit(builder, retvals),
Some("__wrap_malloc_usable_size") | Some("malloc_usable_size") =>
self.hook_malloc_usable_size_exit(builder, retvals),
Some("__wrap_posix_memalign") | Some("posix_memalign") =>
self.hook_posix_memalign_exit(builder),
Some("__wrap_aligned_alloc") | Some("aligned_alloc") =>
self.hook_aligned_alloc(builder, retvals),
Some("__wrap_free") | Some("free") =>
self.hook_free_exit(builder),
_ => ()
}
}
}
Expand Down
22 changes: 16 additions & 6 deletions crates/environ/src/builtin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,18 @@ macro_rules! foreach_builtin_function {
// Invoked before malloc returns.
#[cfg(feature = "wmemcheck")]
check_malloc(vmctx: vmctx, addr: i32, len: i32) -> i32;
// Invoked before calloc returns.
#[cfg(feature = "wmemcheck")]
check_calloc(vmctx: vmctx, addr: i32, count: i32, size: i32) -> i32;
// Invoked before realloc returns.
#[cfg(feature = "wmemcheck")]
check_realloc(vmctx: vmctx, end_addr: i32, start_addr: i32, len: i32) -> i32;
// Invoked before malloc_usable_size returns.
#[cfg(feature = "wmemcheck")]
check_malloc_usable_size(vmctx: vmctx, len: i32, addr: i32) -> i32;
// Invoked before posix_memalign returns.
#[cfg(feature = "wmemcheck")]
check_posix_memalign(vmctx: vmctx, outptr: i32, size: i32) -> i32;
// Invoked before the free returns.
#[cfg(feature = "wmemcheck")]
check_free(vmctx: vmctx, addr: i32) -> i32;
Expand All @@ -53,19 +65,17 @@ macro_rules! foreach_builtin_function {
// Invoked before a store is executed.
#[cfg(feature = "wmemcheck")]
check_store(vmctx: vmctx, num_bytes: i32, addr: i32, offset: i32) -> i32;
// Invoked after malloc is called.
#[cfg(feature = "wmemcheck")]
malloc_start(vmctx: vmctx);
// Invoked after free is called.
#[cfg(feature = "wmemcheck")]
free_start(vmctx: vmctx);
// Invoked when wasm stack pointer is updated.
#[cfg(feature = "wmemcheck")]
update_stack_pointer(vmctx: vmctx, value: i32);
// Invoked before memory.grow is called.
#[cfg(feature = "wmemcheck")]
update_mem_size(vmctx: vmctx, num_bytes: i32);

// Invoked before stuff is called.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we be more specific than "stuff" here?

#[cfg(feature = "wmemcheck")]
memcheck_off(vmctx: vmctx);

// Drop a non-stack GC reference (eg an overwritten table entry)
// once it will no longer be used again. (Note: `val` is not of type
// `reference` because it needn't appear in any stack maps, as it
Expand Down
71 changes: 61 additions & 10 deletions crates/wasmtime/src/runtime/vm/libcalls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1137,6 +1137,40 @@ unsafe fn check_malloc(
Ok(0)
}

// Hook for validating calloc using wmemcheck_state.
#[cfg(feature = "wmemcheck")]
unsafe fn check_calloc(store: &mut dyn VMStore, instance: &mut Instance, addr: u32, count: u32, size: u32) -> Result<u32> {
check_malloc(store, instance, addr, count * size)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add a note here that since we aren't tracking undefined-ness, we don't have to care about the other semantic difference between calloc and malloc, namely that memory is zeroed?

}

// Hook for validating realloc using wmemcheck_state.
#[cfg(feature = "wmemcheck")]
unsafe fn check_realloc(store: &mut dyn VMStore, instance: &mut Instance, end_addr: u32, start_addr: u32, len: u32) -> Result<u32> {
check_free(store, instance, start_addr)?;
check_malloc(store, instance, end_addr, len)
}

// Hook for validating malloc_usable_size using wmemcheck_state.
#[cfg(feature = "wmemcheck")]
unsafe fn check_malloc_usable_size(store: &mut dyn VMStore, instance: &mut Instance, len: u32, addr: u32) -> Result<u32> {
check_free(store, instance, addr)?;
check_malloc(store, instance, addr, len)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this a correct implementation of malloc_usable_size's semantics? The man page on my system says that it returns the size of a memory block; unclear to me whether we need a hook for that, even, to track alloc'd/free state?

}


// Hook for validating posix_memalign using wmemcheck_state.
#[cfg(feature = "wmemcheck")]
unsafe fn check_posix_memalign(store: &mut dyn VMStore, instance: &mut Instance, outptr: u32, size: u32) -> Result<u32> {
for (_, entry) in instance.exports() {
if let wasmtime_environ::EntityIndex::Memory(mem_index) = entry {
let mem = instance.get_memory(*mem_index);
let out = *(mem.base.offset(outptr as isize) as *mut u32);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We definitely need to do a bounds-check here -- could you use the safe APIs for the memory to access the out-pointer?

This is also little-endian-specific code; we'll want to get a [u8; 4] and do an explicit conversion with u32::from_le_bytes. (Wasmtime supports one big-endian architecture, s390x!)

return check_malloc(store, instance, out, size)
}
}
todo!("Why is there no memory?")
}

// Hook for validating free using wmemcheck_state.
#[cfg(feature = "wmemcheck")]
unsafe fn check_free(_store: &mut dyn VMStore, instance: &mut Instance, addr: u32) -> Result<u32> {
Expand All @@ -1158,6 +1192,30 @@ unsafe fn check_free(_store: &mut dyn VMStore, instance: &mut Instance, addr: u3
Ok(0)
}

#[cfg(feature = "wmemcheck")]
fn log_allocation_previous_to(instance: &mut Instance, addr: usize) {
if let Some(wmemcheck_state) = &mut instance.wmemcheck_state {
if let Some((prev_malloc, prev_len)) = wmemcheck_state.malloc_previous_to(addr) {
println!("previous malloc'd range was {:#x}..{:#x}", prev_malloc, prev_malloc + prev_len);
for (_, entry) in instance.exports() {
if let wasmtime_environ::EntityIndex::Memory(mem_index) = entry {
let mem = instance.get_memory(*mem_index);
for i in 0..prev_len {
if i > 0 && i % 40 == 0 {
println!();
}
unsafe {
print!("{:02x} ", *mem.base.offset((prev_malloc + i) as isize));
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like code that could be useful for debugging, but as above with the posix_memalign out-pointer fetch, the memory accesses are unsafe, and also dumping the contents of previously allocated blocks might be too verbose. I think I'd prefer to omit this code for now; ultimately the right answer for any problem that needs examination of memory state like this is probably using a debugger (we have better support planned!) possibly with breakpoints on allocation events or trap-to-debugger on wmemcheck errors.

}
println!();
break
}
}
}
}
}

// Hook for validating load using wmemcheck_state.
#[cfg(feature = "wmemcheck")]
fn check_load(
Expand All @@ -1174,6 +1232,7 @@ fn check_load(
return Ok(0);
}
Err(InvalidRead { addr, len }) => {
log_allocation_previous_to(instance, addr);
bail!("Invalid load at addr {:#x} of size {}", addr, len);
}
Err(OutOfBounds { addr, len }) => {
Expand Down Expand Up @@ -1203,6 +1262,7 @@ fn check_store(
return Ok(0);
}
Err(InvalidWrite { addr, len }) => {
log_allocation_previous_to(instance, addr);
bail!("Invalid store at addr {:#x} of size {}", addr, len)
}
Err(OutOfBounds { addr, len }) => {
Expand All @@ -1216,17 +1276,8 @@ fn check_store(
Ok(0)
}

// Hook for turning wmemcheck load/store validation off when entering a malloc function.
#[cfg(feature = "wmemcheck")]
fn malloc_start(_store: &mut dyn VMStore, instance: &mut Instance) {
if let Some(wmemcheck_state) = &mut instance.wmemcheck_state {
wmemcheck_state.memcheck_off();
}
}

// Hook for turning wmemcheck load/store validation off when entering a free function.
#[cfg(feature = "wmemcheck")]
fn free_start(_store: &mut dyn VMStore, instance: &mut Instance) {
fn memcheck_off(_store: &mut dyn VMStore, instance: &mut Instance) {
if let Some(wmemcheck_state) = &mut instance.wmemcheck_state {
wmemcheck_state.memcheck_off();
}
Expand Down
27 changes: 24 additions & 3 deletions crates/wmemcheck/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,21 @@ pub struct Wmemcheck {
pub flag: bool,
}

impl Wmemcheck {
pub fn malloc_previous_to(&self, addr: usize) -> Option<(usize, usize)> {
let mut best: Option<(usize, usize)> = None;
for (base, len) in self.mallocs.iter() {
if let Some((prev_base, _)) = best {
if prev_base < *base && *base <= addr {
best = Some((*base, *len));
}
} else {
best = Some((*base, *len));
}
}
best
}
}
/// Error types for memory checker.
#[derive(Debug, PartialEq)]
pub enum AccessError {
Expand Down Expand Up @@ -51,7 +66,10 @@ impl Wmemcheck {
}

/// Updates memory checker memory state metadata when malloc is called.
pub fn malloc(&mut self, addr: usize, len: usize) -> Result<(), AccessError> {
pub fn malloc(&mut self, addr: usize, start_len: usize) -> Result<(), AccessError> {
// round up to multiple of 4
let len = (start_len + 3) & !3;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we put this under an option? Perhaps "wmemcheck granularity" or similar?


if !self.is_in_bounds_heap(addr, len) {
return Err(AccessError::OutOfBounds {
addr: addr,
Expand Down Expand Up @@ -101,12 +119,12 @@ impl Wmemcheck {
len: len,
});
}
MemState::ValidToWrite => {
/* MemState::ValidToWrite => {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this commented out? (If the checks were causing issues, could we put the exclusion under an appropriately named and documented option?)

return Err(AccessError::InvalidRead {
addr: addr,
len: len,
});
}
} */
_ => {}
}
}
Expand Down Expand Up @@ -140,6 +158,9 @@ impl Wmemcheck {

/// Updates memory checker memory state metadata when free is called.
pub fn free(&mut self, addr: usize) -> Result<(), AccessError> {
if addr == 0 {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good find! Can we add a comment here noting the implemented semantics (free of a null pointer is a no-op)?

return Ok(());
}
if !self.mallocs.contains_key(&addr) {
return Err(AccessError::InvalidFree { addr: addr });
}
Expand Down