From dda681bcf0a889ac7697dfdc74ad6e2b650b2662 Mon Sep 17 00:00:00 2001 From: Ekaterina Kazantseva Date: Fri, 5 Jul 2024 16:58:27 +0200 Subject: [PATCH] ref --- strainy/gfa_operations/gfa_ops.py | 48 +++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/strainy/gfa_operations/gfa_ops.py b/strainy/gfa_operations/gfa_ops.py index b823ace..f88b472 100644 --- a/strainy/gfa_operations/gfa_ops.py +++ b/strainy/gfa_operations/gfa_ops.py @@ -4,12 +4,27 @@ import re from strainy.logging import set_thread_logging - +""" +This contains functions for operation with graph of gfa format: +1. add_link: Adds a link between specified segments in the graph +2. add_edge: Adds an empty(no sequence) segment with the specified name and coverage to the graph +3. gfa_to_nx: Сonverts the graph from the gfa format to nx (networkx) format +4. from_pandas_adjacency_notinplace: Workaround for networkx.from_pandas_adjacency issue https://github.com/networkx/networkx/issues/7407 +5. clean_graph: Cleans graph from selflinks, and add "A" sequence to 0-length edges +""" logger = logging.getLogger() + + + def add_link(graph, fr, fr_or, to, to_or, w): """ - Add gfa links between unitigs + Adds a link between specified segments in the graph + Parameters: + graph (gfa): graph + fr, to (string): names of segments to be linked (from and to) + fr_or,fr_or (string): orientation of segments to be linked (from and to) + w: weight of the link """ #check if segments exist before connecting if graph.segment(fr) is None or graph.segment(to) is None: @@ -23,8 +38,16 @@ def add_link(graph, fr, fr_or, to, to_or, w): def add_edge(graph,edge, clN, cov): + #TODO remove edge,clN from parameters, use name instead + #TODO add sequ """ - Add gfa child edge + Adds an empty(no sequence) segment with the specified name and coverage to the graph + Parameters: + graph (gfa): graph + name (string): name of the edge to be created + cov (coverage): coverage of the edge to be created + Returns: + gfa edge """ graph.add_line("S\t%s_%s\t*" % (edge, clN)) new_line = graph.try_get_segment("%s_%s" % (edge, clN)) @@ -36,12 +59,19 @@ def add_edge(graph,edge, clN, cov): def gfa_to_nx(g): + """ + Сonverts the graph from the gfa format to nx (networkx) format + Parameters: + graph (gfa): gfa graph + Returns: + graph (nx): nx graph + """ G = nx.Graph() for i in g.segment_names: G.add_node(i) for i in g.dovetails: G.add_edge(i.from_segment.name, i.to_segment.name) - return(G) + return G def from_pandas_adjacency_notinplace(df, create_using=None): @@ -50,8 +80,8 @@ def from_pandas_adjacency_notinplace(df, create_using=None): with the exception of 'copy=True' argument in relabel_nodes. This is because copy=False (default option) implies that the graph can be relabeled in place, which is not always possible + https://github.com/networkx/networkx/issues/7407 """ - try: df = df[df.index] except Exception as err: @@ -68,9 +98,11 @@ def from_pandas_adjacency_notinplace(df, create_using=None): def clean_graph(g): """ - Remove 0len unitigs, virtual and self links - :param g: - :return: + Сlears gfa graph - deletes edges with zero length, and self links + Parameters: + graph (gfa): gfa graph + Returns: + graph (gfa): gfa graph """ for line in g.dovetails: if line.from_segment == line.to_segment: #TODO do not self links