diff --git a/build/Makefile b/build/Makefile index 750571fc..2eb267b7 100644 --- a/build/Makefile +++ b/build/Makefile @@ -4,7 +4,7 @@ JULIA ?= julia DLEXT := $(shell $(JULIA) --startup-file=no -e 'using Libdl; print(Libdl.dlext)') TARGET="../compiled" -DEST="/usr/local" +DEST="/usr/local/moleculargraphjl" MYLIB_INCLUDES = $(TARGET)/include/julia_init.h $(TARGET)/include/libmoleculargraph.h MYLIB_PATH := $(TARGET)/lib/libmoleculargraph.$(DLEXT) @@ -17,9 +17,8 @@ $(MYLIB_PATH) $(MYLIB_INCLUDES): ./build.jl ../src/MolecularGraph.jl # For MacOS testing link: - cp -a $(TARGET)/lib/* $(DEST)/lib - cp -a $(TARGET)/share/* $(DEST)/share - cp -a $(TARGET)/include/* $(DEST)/include + mkdir -p $(DEST) + cp -a $(TARGET)/* $(DEST) install_name_tool -add_rpath $(DEST)/lib $(DEST)/lib/libmoleculargraph.$(DLEXT) install_name_tool -add_rpath $(DEST)/lib/julia $(DEST)/lib/libmoleculargraph.$(DLEXT) diff --git a/scripts/integrate_default_queries.jl b/scripts/integrate_default_queries.jl index 8b0612dd..b4cb9550 100644 --- a/scripts/integrate_default_queries.jl +++ b/scripts/integrate_default_queries.jl @@ -37,6 +37,7 @@ function run() "str_alert_221", "str_alert_222", "str_alert_247", "str_alert_1003", "str_alert_246", "str_alert_381" ] + @info "vprops" length(vprops) - length(skip) for (u, v) in combinations(length(vprops)) ukey = vprops[u]["key"] vkey = vprops[v]["key"] @@ -95,5 +96,4 @@ function run() YAML.write_file(OUTPUT_FILE, vprops) end - -run() \ No newline at end of file +@time run() \ No newline at end of file diff --git a/scripts/python_test.py b/scripts/python_test.py index 34a61d99..3d7caca5 100644 --- a/scripts/python_test.py +++ b/scripts/python_test.py @@ -7,7 +7,7 @@ def jl_init(): dlext = "dylib" if platform.system() == "Darwin" else "so" - libdir = Path("/usr/local/lib") + libdir = Path("/usr/local/moleculargraphjl/lib") jl = CDLL(libdir / f"libmoleculargraph.{dlext}", RTLD_GLOBAL) jl.jl_init_with_image(bytes(libdir), f"libmoleculargraph.{dlext}".encode()) return jl diff --git a/src/stereo.jl b/src/stereo.jl index 6d16b81d..b988729d 100644 --- a/src/stereo.jl +++ b/src/stereo.jl @@ -8,6 +8,17 @@ export stereocenter_from_smiles!, stereocenter_from_sdf2d!, stereobond_from_smiles!, stereobond_from_sdf2d! + +""" + Stereocenter{T} <: AbstractDict{T,Tuple{T,T,T,Bool}} + +A graph-level property that describes chirality of the molecule. + +The dict key represents the stereocenter vertex, and the value is a 4-tuple +in the format (l, v1, v2, is_clockwise). `is_clockwise` is a boolean value indicating +whether nodes `v1` and `v2` are arranged clockwise when viewed from node `l` +toward the stereocenter. +""" struct Stereocenter{T} <: AbstractDict{T,Tuple{T,T,T,Bool}} mapping::Dict{T,Tuple{T,T,T,Bool}} end @@ -32,6 +43,15 @@ function remap(stereo::Stereocenter{T}, vmap::Dict) where T # vmap[old] -> new end +""" + Stereobond{T} <: AbstractDict{Edge{T},Tuple{T,T,Bool}} + +A graph-level property that describes cis-trans isomerism of the molecule. + +The dict key represents the stereogenic double bond edge, and the value is a 3-tuple +in the format (e1, e2, is_cis). `is_cis` is a boolean value indicating +whether edges `e1` and `e2` are placed in the same side of the stereogenic bond. +""" struct Stereobond{T} <: AbstractDict{Edge{T},Tuple{T,T,Bool}} mapping::Dict{Edge{T},Tuple{T,T,Bool}} end @@ -151,7 +171,7 @@ Return stereocenter information obtained from 2D SDFile. function stereocenter_from_sdf2d(g::SimpleGraph{T}, e_order, e_notation, e_isordered, v_coords2d) where T centers = Stereocenter{T}() edgerank = Dict(e => i for (i, e) in enumerate(edges(g))) - comments = Dict{Int,String}() + comments = String[] for i in vertices(g) degree(g, i) in (3, 4) || continue nbrs = ordered_neighbors(g, i) @@ -169,37 +189,56 @@ function stereocenter_from_sdf2d(g::SimpleGraph{T}, e_order, e_notation, e_isord end push!(drs, :unspecified) end - length(drs) == degree(g, i) || (@debug "multiple bond"; continue) + length(drs) == degree(g, i) || (@debug "$(i): multiple bond"; continue) upcnt = count(x -> x === :up, drs) dwcnt = count(x -> x === :down, drs) - (upcnt == 0 && dwcnt == 0) && (@debug "unspecified"; continue) + (upcnt == 0 && dwcnt == 0) && (@debug "$(i): unspecified"; continue) sortorder = [1, map(x -> x + 1, anglesort(v_coords2d, i, nbrs[1], nbrs[2:end]))...] - ons = nbrs[sortorder] - ods = drs[sortorder] + ons = nbrs[sortorder] # node indices of neighbors in angular order + ods = drs[sortorder] # direction symbols of neighbors in angular order if length(nbrs) == 3 - if upcnt + dwcnt == 1 - # if there is implicit hydrogen, an up-wedge -> clockwise, a down-edge -> ccw - centers[i] = (ons[1], ons[2], ons[3], dwcnt == 0) + if upcnt == 1 && dwcnt == 1 || upcnt + dwcnt == 3 + @debug "$(i): a reference plane cannot be defined $(ons) $(ods)" + push!(comments, "$(i): a reference plane cannot be defined") + elseif upcnt == 2 || dwcnt == 2 + centers[i] = (ons[1], ons[2], ons[3], upcnt == 0) else - @debug "Ambiguous stereochemistry (maybe need explicit hydrogen)" - comments[i] = "$(ons) $(ods) Ambiguous stereochemistry (maybe need explicit hydrogen)" - continue + centers[i] = (ons[1], ons[2], ons[3], dwcnt == 0) end - elseif (ods[1] !== :unspecified && ods[3] !== :unspecified && ods[1] !== ods[3] || - ods[2] !== :unspecified && ods[4] !== :unspecified && ods[2] !== ods[4]) - @debug "Ambiguous stereochemistry (opposite direction wedges at opposite side)" - comments[i] = "$(ons) $(ods) Ambiguous stereochemistry (opposite direction wedges at opposite side)" - continue - elseif (ods[1] !== :unspecified && ods[1] === ods[2] || - ods[2] !== :unspecified && ods[2] === ods[3] || - ods[3] !== :unspecified && ods[3] === ods[4] || - ods[4] !== :unspecified && ods[4] === ods[1]) - @debug "Ambiguous stereochemistry (same direction wedges at same side)" - comments[i] = "$(ons) $(ods) Ambiguous stereochemistry (same direction wedges at same side)" continue - elseif upcnt != 0 + end + if upcnt == 4 || dwcnt == 4 + @debug "$(i): a reference plane cannot be defined $(ons) $(ods)" + push!(comments, "$(i): a reference plane cannot be defined") + elseif upcnt == 3 + centers[i] = (ons[1], ons[2], ons[3], ods[1] === :up && ods[3] === :up) + elseif dwcnt == 3 + centers[i] = (ons[1], ons[2], ons[3], ods[2] === :down && ods[4] === :down) + elseif upcnt == 2 + if (ods[1] === :up && ods[3] === :up) || (ods[2] === :up && ods[4] === :up) + centers[i] = (ons[1], ons[2], ons[3], ods[1] === :up) + else + @debug "$(i): adjacent wedges lie on the same side of the reference plane $(ons) $(ods)" + push!(comments, "$(i): adjacent wedges lie on the same side of the reference plane") + end + elseif dwcnt == 2 + if (ods[1] === :down && ods[3] === :down) || (ods[2] === :down && ods[4] === :down) + centers[i] = (ons[1], ons[2], ons[3], ods[1] !== :down) + else + @debug "$(i): adjacent wedges lie on the same side of the reference plane $(ons) $(ods)" + push!(comments, "$(i): adjacent wedges lie on the same side of the reference plane") + end + elseif upcnt == 1 && dwcnt == 1 + if ((ods[1] === :unspecified && ods[3] === :unspecified) + || (ods[2] === :unspecified && ods[4] === :unspecified)) + @debug "$(i): non-adjacent wedges lie on the opposite side of the reference plane $(ons) $(ods)" + push!(comments, "$(i): non-adjacent wedges lie on the opposite side of the reference plane") + else + centers[i] = (ons[1], ons[2], ons[3], ods[1] === :up || ods[3] === :up) + end + elseif upcnt == 1 centers[i] = (ons[1], ons[2], ons[3], ods[1] === :up || ods[3] === :up) - else # down wedges only + else # dwcnt == 1 centers[i] = (ons[1], ons[2], ons[3], ods[2] === :down || ods[4] === :down) end end @@ -222,8 +261,8 @@ Set stereocenter information obtained from 2D SDFile. function stereocenter_from_sdf2d!(mol::MolGraph) centers, comments = stereocenter_from_sdf2d(mol) mol.gprops[:stereocenter] = centers - if length(comments) > 0 - mol.gprops[:stereocenter_ignored] = comments + if length(comments) > 0 + mol.gprops[:stereocenter_ignored] = join(comments, "; ") end end @@ -308,6 +347,7 @@ Return cis-trans diastereomerism information obtained from SMILES. """ function stereobond_from_smiles(g::SimpleGraph{T}, e_order, e_direction) where T stereobonds = Stereobond{T}() + comments = String[] edgerank = Dict(e => i for (i, e) in enumerate(edges(g))) for (i, e) in enumerate(edges(g)) e_order[i] == 2 || continue @@ -335,11 +375,15 @@ function stereobond_from_smiles(g::SimpleGraph{T}, e_order, e_direction) where T end (isempty(sds) || isempty(dds)) && continue # no :up or :down bonds # Conflicting cis/trans will be ignored (adopts OpenSMILES specs) - length(sds) == 2 && sds[1][2] == sds[2][2] && continue - length(dds) == 2 && dds[1][2] == dds[2][2] && continue + if (length(sds) == 2 && sds[1][2] == sds[2][2]) || ( + length(dds) == 2 && dds[1][2] == dds[2][2]) + @debug "$(e): conflicting up and down bonds $(sds) $(dds)" + push!(comments, "$(e): conflicting up and down bonds") + continue + end stereobonds[e] = (sds[1][1], dds[1][1], sds[1][2] !== dds[1][2]) end - return stereobonds + return stereobonds, comments end stereobond_from_smiles(mol::SimpleMolGraph) = stereobond_from_smiles( @@ -353,8 +397,13 @@ stereobond_from_smiles(mol::SimpleMolGraph) = stereobond_from_smiles( Set cis-trans diastereomerism information obtained from SMILES. """ -stereobond_from_smiles!(mol::MolGraph - ) = begin mol.gprops[:stereobond] = stereobond_from_smiles(mol) end +function stereobond_from_smiles!(mol::MolGraph) + bonds, comments = stereobond_from_smiles(mol) + mol.gprops[:stereobond] = bonds + if length(comments) > 0 + mol.gprops[:stereobond_ignored] = join(comments, "; ") + end +end # TODO: axial chirality # TODO: hypervalent chirality diff --git a/test/stereo.jl b/test/stereo.jl index 6755d334..9cd81b8f 100644 --- a/test/stereo.jl +++ b/test/stereo.jl @@ -76,8 +76,8 @@ end edges = Edge.([(1,2), (1,3), (1,4), (1,5)]) default_config = Dict{Symbol,Any}( :on_init => MolecularGraph.sdf_on_init!, :updater => MolecularGraph.sdf_on_update!) - uns1 = MolGraph(edges, atoms, bonds, config_map=default_config) - @test isempty(get_prop(uns1, :stereocenter)) + uns = MolGraph(edges, atoms, bonds, config_map=default_config) + @test isempty(get_prop(uns, :stereocenter)) bonds = [ SDFBond(1, 0), @@ -139,8 +139,8 @@ end SDFBond(1, 6), SDFBond(1, 0) ] - wrong1 = MolGraph(edges, atoms, bonds, config_map=default_config) - @test isempty(get_prop(wrong1, :stereocenter)) + mol7 = MolGraph(edges, atoms, bonds, config_map=default_config) + @test get_prop(mol7, :stereocenter)[1] == (2, 3, 5, true) bonds = [ SDFBond(1, 1), @@ -148,8 +148,26 @@ end SDFBond(1, 1), SDFBond(1, 1) ] - wrong2 = MolGraph(edges, atoms, bonds, config_map=default_config) - @test isempty(get_prop(wrong2, :stereocenter)) + mol8 = MolGraph(edges, atoms, bonds, config_map=default_config) + @test get_prop(mol8, :stereocenter)[1] == (2, 3, 5, true) + + bonds = [ + SDFBond(1, 1), + SDFBond(1, 0), + SDFBond(1, 6), + SDFBond(1, 1) + ] + mol9 = MolGraph(edges, atoms, bonds, config_map=default_config) + @test get_prop(mol9, :stereocenter)[1] == (2, 3, 5, true) + + bonds = [ + SDFBond(1, 6), + SDFBond(1, 1), + SDFBond(1, 1), + SDFBond(1, 6) + ] + mol10 = MolGraph(edges, atoms, bonds, config_map=default_config) + @test get_prop(mol10, :stereocenter)[1] == (2, 3, 5, false) bonds = [ SDFBond(1, 1), @@ -157,8 +175,11 @@ end SDFBond(1, 0), SDFBond(1, 6) ] - wrong3 = MolGraph(edges, atoms, bonds, config_map=default_config) - @test isempty(get_prop(wrong3, :stereocenter)) + wrong1 = MolGraph(edges, atoms, bonds, config_map=default_config) + @test isempty(get_prop(wrong1, :stereocenter)) + @test haskey(wrong1.gprops, :stereocenter_ignored) + # serialization check + @test nv(MolGraph(to_json(wrong1))) == 5 bonds = [ SDFBond(1, 1), @@ -166,8 +187,39 @@ end SDFBond(1, 0), SDFBond(1, 0) ] + wrong2 = MolGraph(edges, atoms, bonds, config_map=default_config) + @test isempty(get_prop(wrong2, :stereocenter)) + @test haskey(wrong2.gprops, :stereocenter_ignored) + + bonds = [ + SDFBond(1, 1), + SDFBond(1, 1), + SDFBond(1, 1), + SDFBond(1, 1) + ] + wrong3 = MolGraph(edges, atoms, bonds, config_map=default_config) + @test isempty(get_prop(wrong3, :stereocenter)) + @test haskey(wrong3.gprops, :stereocenter_ignored) + + bonds = [ + SDFBond(1, 6), + SDFBond(1, 6), + SDFBond(1, 6), + SDFBond(1, 6) + ] wrong4 = MolGraph(edges, atoms, bonds, config_map=default_config) @test isempty(get_prop(wrong4, :stereocenter)) + @test haskey(wrong4.gprops, :stereocenter_ignored) + + bonds = [ + SDFBond(1, 1), + SDFBond(1, 6), + SDFBond(1, 1), + SDFBond(1, 6) + ] + wrong5 = MolGraph(edges, atoms, bonds, config_map=default_config) + @test isempty(get_prop(wrong5, :stereocenter)) + @test haskey(wrong5.gprops, :stereocenter_ignored) # degree=3 atoms = [ @@ -182,26 +234,69 @@ end SDFBond(1, 0) ] edges = Edge.([(1,2), (1,3), (1,4)]) - uns2 = MolGraph(edges, atoms, bonds, config_map=default_config) - @test isempty(get_prop(uns2, :stereocenter)) + implh_uns = MolGraph(edges, atoms, bonds, config_map=default_config) + @test isempty(get_prop(implh_uns, :stereocenter)) + @test !haskey(implh_uns.gprops, :stereocenter_ignored) bonds = [ - SDFBond(1, 1), SDFBond(1, 0), SDFBond(1, 0) + SDFBond(1, 1), + SDFBond(1, 0), + SDFBond(1, 0) ] implh1 = MolGraph(edges, atoms, bonds, config_map=default_config) @test get_prop(implh1, :stereocenter)[1] == (2, 3, 4, true) bonds = [ - SDFBond(1, 0), SDFBond(1, 6), SDFBond(1, 0) + SDFBond(1, 0), + SDFBond(1, 6), + SDFBond(1, 0) ] implh2 = MolGraph(edges, atoms, bonds, config_map=default_config) @test get_prop(implh2, :stereocenter)[1] == (2, 3, 4, false) bonds = [ - SDFBond(1, 0), SDFBond(1, 6), SDFBond(1, 1) + SDFBond(1, 6), + SDFBond(1, 6), + SDFBond(1, 0) ] - wrong5 = MolGraph(edges, atoms, bonds, config_map=default_config) - @test isempty(get_prop(wrong5, :stereocenter)) + implh3 = MolGraph(edges, atoms, bonds, config_map=default_config) + @test get_prop(implh3, :stereocenter)[1] == (2, 3, 4, true) + + bonds = [ + SDFBond(1, 0), + SDFBond(1, 6), + SDFBond(1, 1) + ] + implh_wrong = MolGraph(edges, atoms, bonds, config_map=default_config) + @test isempty(get_prop(implh_wrong, :stereocenter)) + @test haskey(implh_wrong.gprops, :stereocenter_ignored) + + bonds = [ + SDFBond(1, 1), + SDFBond(1, 1), + SDFBond(1, 1) + ] + implh_wrong2 = MolGraph(edges, atoms, bonds, config_map=default_config) + @test isempty(get_prop(implh_wrong2, :stereocenter)) + @test haskey(implh_wrong2.gprops, :stereocenter_ignored) + + bonds = [ + SDFBond(1, 6), + SDFBond(1, 6), + SDFBond(1, 6) + ] + implh_wrong3 = MolGraph(edges, atoms, bonds, config_map=default_config) + @test isempty(get_prop(implh_wrong3, :stereocenter)) + @test haskey(implh_wrong3.gprops, :stereocenter_ignored) + + bonds = [ + SDFBond(1, 6), + SDFBond(1, 1), + SDFBond(1, 6) + ] + implh_wrong4 = MolGraph(edges, atoms, bonds, config_map=default_config) + @test isempty(get_prop(implh_wrong4, :stereocenter)) + @test haskey(implh_wrong4.gprops, :stereocenter_ignored) # transformed atoms = [ @@ -271,7 +366,10 @@ end @test get_prop(cis, :stereobond)[Edge(1 => 3)] == (2, 4, true) # cis conflict = smilestomol("C/C=C(/C)/C") - @test isempty(get_prop(conflict, :stereobond)) # ignored + @test isempty(get_prop(conflict, :stereobond)) + @test haskey(conflict.gprops, :stereobond_ignored) + # serialization check + @test nv(MolGraph(to_json(conflict))) == 5 mol3 = smilestomol("C\\C([H])=C([H])/C") @test get_prop(mol3, :stereobond)[Edge(2 => 4)] == (1, 6, true)