Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bump LLVM and GPUCompiler versions + related fixes #82

Merged
merged 8 commits into from
Oct 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,16 @@ jobs:
fail-fast: false
matrix:
version:
- '1.9'
- '~1.10.0-0'
- '1.10'
- 'pre'
- 'nightly'
os:
- ubuntu-latest
arch:
- x64
steps:
- uses: actions/checkout@v3
- uses: julia-actions/setup-julia@v1
- uses: julia-actions/setup-julia@v2
with:
version: ${{ matrix.version }}
arch: ${{ matrix.arch }}
Expand Down
8 changes: 4 additions & 4 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "AllocCheck"
uuid = "9b6a8646-10ed-4001-bbdc-1d2f46dfbb1a"
authors = ["JuliaHub Inc."]
version = "0.1.3"
version = "0.2.0"

[deps]
ExprTools = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
Expand All @@ -10,11 +10,11 @@ LLVM = "929cbde3-209d-540e-8aea-75f648917ca0"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"

[compat]
GPUCompiler = "0.24, 0.25, 0.26"
LLVM = "6.3"
GPUCompiler = "0.27"
LLVM = "9.1"
ExprTools = "0.1"
MacroTools = "0.5"
julia = "1.9"
julia = "1.10"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Expand Down
26 changes: 22 additions & 4 deletions src/AllocCheck.jl
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,28 @@ Find all static allocation sites in the provided LLVM IR.

This function modifies the LLVM module in-place, effectively trashing it.
"""
function find_allocs!(mod::LLVM.Module, meta; ignore_throw=true)
function find_allocs!(mod::LLVM.Module, meta, entry_name::String; ignore_throw=true, invoke_entry=false)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you document what invoke_entry does?

(; entry, compiled) = meta

errors = []
entry = LLVM.ModuleFunctionSet(mod)[entry_name]
worklist = LLVM.Function[ entry ]
seen = LLVM.Function[ entry ]
if invoke_entry
@assert startswith(name(entry), "jfptr")
f = pop!(worklist)
for block in blocks(f)
for inst in instructions(block)
if isa(inst, LLVM.CallInst)
decl = called_operand(inst)
if decl isa LLVM.Function && length(blocks(decl)) > 0 && !in(decl, seen)
push!(worklist, decl)
push!(seen, decl)
end
end
end
end
end
while !isempty(worklist)
f = pop!(worklist)

Expand Down Expand Up @@ -202,12 +218,14 @@ function check_allocs(@nospecialize(func), @nospecialize(types); ignore_throw=tr
end
source = GPUCompiler.methodinstance(Base._stable_typeof(func), Base.to_tuple_type(types))
target = DefaultCompilerTarget()
job = CompilerJob(source, config)
job = CompilerJob(source, alloc_config(:specfunc))
allocs = JuliaContext() do ctx
mod, meta = GPUCompiler.compile(:llvm, job, validate=false, optimize=false, cleanup=false)
optimize!(job, mod)
(; entry, compiled) = meta
entry_name = name(entry)
optimize!(mod)

allocs = find_allocs!(mod, meta; ignore_throw)
allocs = find_allocs!(mod, meta, entry_name; ignore_throw, invoke_entry=false)
# display(mod)
# dispose(mod)
allocs
Expand Down
9 changes: 5 additions & 4 deletions src/classify.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ function classify_runtime_fn(name::AbstractString; ignore_throw::Bool)
if name in ("alloc_genericmemory", "genericmemory_copy", "genericmemory_copy_slice",
"string_to_genericmemory", "ptr_to_genericmemory", "array_copy", "alloc_string",
"alloc_array_1d", "alloc_array_2d", "alloc_array_3d", "gc_alloc_typed",
"gc_pool_alloc", "gc_pool_alloc_instrumented", "gc_big_alloc_instrumented"
"gc_small_alloc", "gc_pool_alloc", "gc_small_alloc_instrumented",
"gc_pool_alloc_instrumented", "gc_big_alloc_instrumented"
) || occursin(r"^box_.*", name)
return (:alloc, may_alloc)
elseif name in ("f__apply_latest", "f__apply_iterate", "f__apply_pure", "f__call_latest",
Expand Down Expand Up @@ -62,7 +63,7 @@ function fn_may_allocate(name::AbstractString; ignore_throw::Bool)
if name in ("egal__unboxed", "lock_value", "unlock_value", "get_nth_field_noalloc",
"load_and_lookup", "lazy_load_and_lookup", "box_bool", "box_int8",
"box_uint8", "excstack_state", "restore_excstack", "enter_handler",
"pop_handler", "f_typeof", "clock_now", "throw", "gc_queue_root", "gc_enable",
"pop_handler", "pop_handler_noexcept", "f_typeof", "clock_now", "throw", "gc_queue_root", "gc_enable",
"gc_disable_finalizers_internal", "gc_is_in_finalizer", "enable_gc_logging",
"gc_safepoint", "gc_collect", "genericmemory_owner", "get_pgcstack") || occursin(r"^unbox_.*", name)
return false # these functions never allocate
Expand Down Expand Up @@ -141,7 +142,7 @@ function resolve_allocations(call::LLVM.Value)
isnothing(match_) && return nothing
name = match_[2]

if name in ("gc_pool_alloc_instrumented", "gc_big_alloc_instrumented", "gc_alloc_typed")
if name in ("gc_pool_alloc_instrumented", "gc_small_alloc_instrumented", "gc_big_alloc_instrumented", "gc_alloc_typed")
type = resolve_static_jl_value_t(operands(call)[end-1])
return type !== nothing ? [(call, type)] : nothing
elseif name in ("alloc_array_1d", "alloc_array_2d", "alloc_array_3d")
Expand Down Expand Up @@ -179,7 +180,7 @@ function resolve_allocations(call::LLVM.Value)
typestr == "uint8pointer" && return [(call, Ptr{UInt8})]
typestr == "voidpointer" && return [(call, Ptr{Cvoid})]
@assert false # above is exhaustive
elseif name == "gc_pool_alloc"
elseif name in ("gc_pool_alloc", "gc_small_alloc")
seen = Set()
allocs = Tuple{LLVM.Instruction, Any}[]
for calluse in transitive_uses(call; unwrap = (use)->user(use) isa LLVM.BitCastInst)
Expand Down
61 changes: 22 additions & 39 deletions src/compiler.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ function __init__()
tm[] = LLVM.JITTargetMachine(LLVM.triple(), cpu_name(), cpu_features();
optlevel = llvm_codegen_level(opt_level))
LLVM.asm_verbosity!(tm[], true)
lljit = LLVM.has_julia_ojit() ? LLVM.JuliaOJIT() : LLVM.LLJIT(; tm=tm[])
lljit = LLVM.JuliaOJIT()

jd_main = LLVM.JITDylib(lljit)

Expand All @@ -35,20 +35,11 @@ function __init__()
end
end

@static if LLVM.has_julia_ojit()
struct CompilerInstance
jit::LLVM.JuliaOJIT
lctm::Union{LLVM.LazyCallThroughManager, Nothing}
ism::Union{LLVM.IndirectStubsManager, Nothing}
end
else
struct CompilerInstance
jit::LLVM.LLJIT
lctm::Union{LLVM.LazyCallThroughManager, Nothing}
ism::Union{LLVM.IndirectStubsManager, Nothing}
end
struct CompilerInstance
jit::LLVM.JuliaOJIT
lctm::Union{LLVM.LazyCallThroughManager, Nothing}
ism::Union{LLVM.IndirectStubsManager, Nothing}
end

struct CompileResult{Success, F, TT, RT}
f_ptr::Ptr{Cvoid}
arg_types::Type{TT}
Expand All @@ -65,29 +56,16 @@ const tm = Ref{TargetMachine}() # for opt pipeline
# cache of kernel instances
const _kernel_instances = Dict{Any, Any}()
const compiler_cache = Dict{Any, CompileResult}()
const config = CompilerConfig(DefaultCompilerTarget(), NativeParams();
kernel=false, entry_abi = :specfunc, always_inline=false)
alloc_config(func_abi::Symbol) = CompilerConfig(DefaultCompilerTarget(), NativeParams();
kernel=false, entry_abi = func_abi, always_inline=false)

const NativeCompilerJob = CompilerJob{NativeCompilerTarget,NativeParams}
GPUCompiler.can_safepoint(@nospecialize(job::NativeCompilerJob)) = true
GPUCompiler.runtime_module(::NativeCompilerJob) = Runtime

function optimize!(@nospecialize(job::CompilerJob), mod::LLVM.Module)
triple = GPUCompiler.llvm_triple(job.config.target)
tm = GPUCompiler.llvm_machine(job.config.target)
if VERSION >= v"1.10-beta3"
@dispose pb = LLVM.PassBuilder(tm) begin
@dispose mpm = LLVM.NewPMModulePassManager(pb) begin
build_newpm_pipeline!(pb, mpm)
run!(mpm, mod, tm)
end
end
else
@dispose pm=LLVM.ModulePassManager() begin
build_oldpm_pipeline!(pm)
run!(pm, mod)
end
end
function optimize!(mod::LLVM.Module)
pipeline = LLVM.Interop.JuliaPipeline(opt_level=Base.JLOptions().opt_level)
run!(pipeline, mod)
end

"""
Expand All @@ -112,15 +90,17 @@ function compile_callable(f::F, tt::TT=Tuple{}; ignore_throw=true) where {F, TT}
function compile(@nospecialize(job::CompilerJob))
return JuliaContext() do ctx
mod, meta = GPUCompiler.compile(:llvm, job, validate=false)
optimize!(job, mod)
(; entry, compiled) = meta
entry_name = name(entry)
optimize!(mod)

clone = copy(mod)
analysis = find_allocs!(mod, meta; ignore_throw)
analysis = find_allocs!(mod, meta, entry_name; ignore_throw, invoke_entry=true)
# TODO: This is the wrong meta
return clone, meta, analysis
return clone, entry_name, analysis
end
end
function link(@nospecialize(job::CompilerJob), (mod, meta, analysis))
function link(@nospecialize(job::CompilerJob), (mod, entry_name, analysis))
return JuliaContext() do ctx
lljit = jit[].jit
jd = LLVM.JITDylib(lljit)
Expand All @@ -130,7 +110,7 @@ function compile_callable(f::F, tt::TT=Tuple{}; ignore_throw=true) where {F, TT}
GPUCompiler.ThreadSafeModule(mod)
end
LLVM.add!(lljit, jd, tsm)
f_ptr = pointer(LLVM.lookup(lljit, LLVM.name(meta.entry)))
f_ptr = pointer(LLVM.lookup(lljit, entry_name))
if f_ptr == C_NULL
throw(GPUCompiler.InternalCompilerError(job,
"Failed to compile @check_allocs function"))
Expand All @@ -142,7 +122,7 @@ function compile_callable(f::F, tt::TT=Tuple{}; ignore_throw=true) where {F, TT}
end
end
end
fun = GPUCompiler.cached_compilation(cache, source, config, compile, link)
fun = GPUCompiler.cached_compilation(cache, source, alloc_config(:func), compile, link)

# create a callable object that captures the function instance. we don't need to think
# about world age here, as GPUCompiler already does and will return a different object
Expand All @@ -153,7 +133,10 @@ end

function (f::CompileResult{Success, F, TT, RT})(args...) where {Success, F, TT, RT}
if Success
return abi_call(f.f_ptr, RT, TT, f.func, args...)
argsv = Any[args...]
GC.@preserve argsv begin
return ccall(f.f_ptr, Any, (Any, Ptr{Any}, UInt32), f.func, pointer(argsv), length(args))
end
else
error("@check_allocs function contains ", length(f.analysis), " allocations.")
end
Expand Down
26 changes: 1 addition & 25 deletions src/compiler_utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,29 +20,5 @@ function cpu_name()
end

function cpu_features()
if VERSION >= v"1.10.0-beta1"
return ccall(:jl_get_cpu_features, String, ())
end

@static if Sys.ARCH == :x86_64 ||
Sys.ARCH == :x86
return "+mmx,+sse,+sse2,+fxsr,+cx8" # mandated by Julia
else
return ""
end
end

if VERSION >= v"1.10-beta3"
function build_newpm_pipeline!(pb::LLVM.PassBuilder, mpm::LLVM.NewPMModulePassManager, speedup=2, size=0, lower_intrinsics=true,
dump_native=false, external_use=false, llvm_only=false,)
ccall(:jl_build_newpm_pipeline, Cvoid,
(LLVM.API.LLVMModulePassManagerRef, LLVM.API.LLVMPassBuilderRef, Cint, Cint, Cint, Cint, Cint, Cint),
mpm, pb, speedup, size, lower_intrinsics, dump_native, external_use, llvm_only)
end
else
function build_oldpm_pipeline!(pm::LLVM.ModulePassManager, opt_level=2, lower_intrinsics=true)
ccall(:jl_add_optimization_passes, Cvoid,
(LLVM.API.LLVMPassManagerRef, Cint, Cint),
pm, opt_level, lower_intrinsics)
end
return ccall(:jl_get_cpu_features, String, ())
end
37 changes: 29 additions & 8 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,11 @@ end
# The check should raise errors only for problematic argument types
@check_allocs mymul(x,y) = x * y
@test mymul(1.5, 2.5) == 1.5 * 2.5
@test_throws AllocCheckFailure mymul(rand(10,10), rand(10,10))
if VERSION < v"1.12-DEV"
@test_throws AllocCheckFailure mymul(rand(10,10), rand(10,10))
else
@test_broken false # TODO: investigate segfault above with --check-bounds=yes
end

# If provided, ignore_throw=false should include allocations that
# happen only on error paths
Expand Down Expand Up @@ -220,11 +224,11 @@ end
@test length(check_allocs(Base.mightalias, (Memory{Int},Memory{Int}))) == 0 # uses jl_genericmemory_owner (intercepted)
end

@test any(alloc.type == Base.RefValue{Int} for alloc in check_allocs(()->Ref{Int}(), ()))
@test any((alloc isa AllocationSite && alloc.type == Base.RefValue{Int}) for alloc in check_allocs(()->Ref{Int}(), ()))

allocs1 = check_allocs(()->Ref{Vector{Int64}}(Int64[]), ())
@test any(alloc.type == Base.RefValue{Vector{Int64}} for alloc in allocs1)
@test any(alloc.type == Vector{Int64} for alloc in allocs1)
@test any((alloc isa AllocationSite && alloc.type == Base.RefValue{Vector{Int64}}) for alloc in allocs1)
@test any((alloc isa AllocationSite && alloc.type == Vector{Int64}) for alloc in allocs1)
end

@testset "Error types" begin
Expand Down Expand Up @@ -327,8 +331,25 @@ Documentation for `issue64`.
v[i], v[j] = v[j], v[i]
v
end
let io = IOBuffer()
print(io, @doc issue64)
s = String(take!(io))
@test occursin("Documentation for `issue64`.", s)
@check_allocs function foo_with_union_rt(t::Tuple{Float64, Float64})
if rand((1, -1)) == 1
return t
else
return nothing
end
end

@testset "issues" begin
# issue #64
let io = IOBuffer()
print(io, @doc issue64)
s = String(take!(io))
@test occursin("Documentation for `issue64`.", s)
end

# issue #70
x = foo_with_union_rt((1.0, 1.5))
@test x === nothing || x === (1.0, 1.5)
x = foo_with_union_rt((1.0, 1.5))
@test x === nothing || x === (1.0, 1.5)
end
Loading