-
Notifications
You must be signed in to change notification settings - Fork 14
/
PBA_pipeline.py
90 lines (71 loc) · 3.68 KB
/
PBA_pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import numpy as np, sys, os, getopt
#========================================================================================#
def printHelp():
print '''Argument flags:
-X <path_to_expression_matrix> (required if no edge list is supplied; .npy or .csv)
-E <minimum_mean_expression> (default = -10000; used to filter genes)
-V <minimum_CV> (default = 0; used to filter genes)
-N <Normalize> (default = False; used to normalize expression data for knn graph)
-p <PCA dimension> (default = 50; used to compute distance matrix)
-k <number of nearest neighbors> (default = 10; used to compute edge list)
-e <path_to_edge_list> (required if no expression matrix is supplied)
-R <path_to_sources_sinks_vector> (required; .npy or .csv)
-S <path_to_lineage_specific_sink_matrix> (optional, needed to compute fate probabilities; .csv or .npy)
-D <diffusion_constant> (default = 1.0; controls the level of stochasticity in the model)\n'''
#========================================================================================#
def main(argv):
try:
opts,args = getopt.getopt(argv, 'X:E:V:p:k:e:R:S:D:N:')
except:
print '\nInputs formatted incorrectly'
printHelp(); sys.exit(2)
#get the arguments and turn them into variables
path_to_expression_matrix = None
minimum_mean_expression = -10000
minimum_CV = 0
normalize = False
k = 10
p = 60
path_to_edge_list = None
path_to_R = None
path_to_S = None
D = 1.0
for o,a in opts:
if o == '-X': path_to_expression_matrix = a
if o == '-E': minimum_mean_expression = float(a)
if o == '-V': minimum_CV = float(a)
if o == '-N': normalize = (a == 'True')
if o == '-p': p = int(a)
if o == '-k': k = int(a)
if o == '-e': path_to_edge_list = a
if o == '-R': path_to_R = a
if o == '-S': path_to_S = a
if o == '-D': D = float(a)
#====================================================================================#
for path in [path_to_expression_matrix, path_to_edge_list, path_to_R, path_to_S]:
if path != None and not os.path.exists(path): print 'Error: The file '+path+' does not exist'; sys.exit(2)
if path_to_expression_matrix == None and path_to_edge_list == None:
print 'Error: You must input either an expression matrix (-X) or knn edge list (-e)'; sys.exit(2)
elif path_to_edge_list == None:
print '\n## Running compute_knn_graph.py'
os.system('python compute_knn_graph.py -E '+repr(minimum_mean_expression)+' -V '+repr(minimum_CV)+' -k '+repr(k)+' -p '+repr(p)+' -X '+ path_to_expression_matrix + ' -N '+repr(normalize))
path_to_edge_list = '/'.join(path_to_expression_matrix.split('/')[:-1] + ['edge_list.csv'])
print '\n## Running compute_Linv.py'
os.system('python compute_Linv.py -e '+path_to_edge_list)
print '\n## Running compute_potential.py'
os.system('python compute_potential.py -R '+path_to_R)
print '\n## Running compute_fate_probabilities.py'
os.system('python compute_fate_probabilities.py -S '+path_to_S+' -e '+path_to_edge_list+' -D '+repr(D))
if '.csv' in path_to_R: R = np.loadtxt(path_to_R, delimiter=',')
elif '.npy' in path_to_R: R = np.load(path_to_R)
N_cells = len(R)
command = 'python compute_mean_first_passage_times.py -R '+path_to_R+' -e '+path_to_edge_list+' -D '+repr(D)
if N_cells > 1000:
print 'WARNING: the script "compute_mean_first_passage_times.py" will be vey slow \
for your dataset of '+repr(N_cells)+' cells. To run this script, use the following:\n'
print command,'\n'
else:
print '\n## Running compute_mean_first_passage_times.py'
os.system(command)
if __name__ == '__main__':
main(sys.argv[1:])