Skip to content

Commit

Permalink
ref
Browse files Browse the repository at this point in the history
  • Loading branch information
katerinakazantseva committed Jul 5, 2024
1 parent 2a4ac99 commit dda681b
Showing 1 changed file with 40 additions and 8 deletions.
48 changes: 40 additions & 8 deletions strainy/gfa_operations/gfa_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,27 @@
import re
from strainy.logging import set_thread_logging


"""
This contains functions for operation with graph of gfa format:
1. add_link: Adds a link between specified segments in the graph
2. add_edge: Adds an empty(no sequence) segment with the specified name and coverage to the graph
3. gfa_to_nx: Сonverts the graph from the gfa format to nx (networkx) format
4. from_pandas_adjacency_notinplace: Workaround for networkx.from_pandas_adjacency issue https://github.com/networkx/networkx/issues/7407
5. clean_graph: Cleans graph from selflinks, and add "A" sequence to 0-length edges
"""
logger = logging.getLogger()




def add_link(graph, fr, fr_or, to, to_or, w):
"""
Add gfa links between unitigs
Adds a link between specified segments in the graph
Parameters:
graph (gfa): graph
fr, to (string): names of segments to be linked (from and to)
fr_or,fr_or (string): orientation of segments to be linked (from and to)
w: weight of the link
"""
#check if segments exist before connecting
if graph.segment(fr) is None or graph.segment(to) is None:
Expand All @@ -23,8 +38,16 @@ def add_link(graph, fr, fr_or, to, to_or, w):


def add_edge(graph,edge, clN, cov):
#TODO remove edge,clN from parameters, use name instead
#TODO add sequ
"""
Add gfa child edge
Adds an empty(no sequence) segment with the specified name and coverage to the graph
Parameters:
graph (gfa): graph
name (string): name of the edge to be created
cov (coverage): coverage of the edge to be created
Returns:
gfa edge
"""
graph.add_line("S\t%s_%s\t*" % (edge, clN))
new_line = graph.try_get_segment("%s_%s" % (edge, clN))
Expand All @@ -36,12 +59,19 @@ def add_edge(graph,edge, clN, cov):


def gfa_to_nx(g):
"""
Сonverts the graph from the gfa format to nx (networkx) format
Parameters:
graph (gfa): gfa graph
Returns:
graph (nx): nx graph
"""
G = nx.Graph()
for i in g.segment_names:
G.add_node(i)
for i in g.dovetails:
G.add_edge(i.from_segment.name, i.to_segment.name)
return(G)
return G


def from_pandas_adjacency_notinplace(df, create_using=None):
Expand All @@ -50,8 +80,8 @@ def from_pandas_adjacency_notinplace(df, create_using=None):
with the exception of 'copy=True' argument in relabel_nodes.
This is because copy=False (default option) implies that the graph
can be relabeled in place, which is not always possible
https://github.com/networkx/networkx/issues/7407
"""

try:
df = df[df.index]
except Exception as err:
Expand All @@ -68,9 +98,11 @@ def from_pandas_adjacency_notinplace(df, create_using=None):

def clean_graph(g):
"""
Remove 0len unitigs, virtual and self links
:param g:
:return:
Сlears gfa graph - deletes edges with zero length, and self links
Parameters:
graph (gfa): gfa graph
Returns:
graph (gfa): gfa graph
"""
for line in g.dovetails:
if line.from_segment == line.to_segment: #TODO do not self links
Expand Down

0 comments on commit dda681b

Please sign in to comment.