-
Notifications
You must be signed in to change notification settings - Fork 0
/
w4.py
134 lines (118 loc) · 3.12 KB
/
w4.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#from __future__ import division
from w2_2 import *
from ch6 import *
import numpy
import random
from numpy.random import choice
def inputFile(file):
lines = file.readline().split()
k = int(lines[0])
t = int(lines[1])
N = int(lines[2])
dna = []
for i in range(t):
dna.append(file.readline().strip())
return (dna,k,t,N)
def MotifsFromProfileMatrix(dna, profile, k):
motifs = []
for dna_ in dna:
most_prob = ProfileMostProbable(dna_, k, profile)
motifs.append(most_prob)
return motifs
def RandomizedMotifSearch(dna, k, t):
best_motifs = []
for dna_ in dna:
random_number = random.randint(0,len(dna_) - k)
kmer = dna_[random_number: random_number + k]
best_motifs.append(kmer)
best_score = score(best_motifs)
while True:
profile = ProfileMatrixFromMotifsWithPseudocounts(best_motifs, 1)
motifs = MotifsFromProfileMatrix(dna, profile, k)
my_score = score(motifs)
if my_score < best_score:
best_motifs = motifs
best_score = my_score
else:
return best_motifs
def RandomizedMotifSearchWithIterations(dna, k, t):
best_score = 999999
best_motifs = []
iterations = 0
while True:
motifs = RandomizedMotifSearch(dna, k, t)
my_score = score(motifs)
if my_score < best_score:
best_score = my_score
best_motifs = motifs
iterations = 0
print(best_score)
else:
iterations += 1
if iterations > 500:
break
return best_motifs
def Probabilities(motifs, profile):
probs = []
for motif in motifs:
prob = 1
for j in range(len(motif)):
prob = prob*profile[motif[j]][j]
probs.append(prob)
return probs
def RandomMotif(probs, dna_string, k):
sums = float(sum(probs))
random_prob = random.random()
my_sum = 0.0
for i in range(len(probs)):
my_sum += probs[i]
if random_prob <= my_sum/sums:
return dna_string[i:i+k]
def GibbsSampler(dna, k, t, N):
motifs = []
best_motifs = []
for dna_ in dna:
random_number = random.randint(0,len(dna_) - k)
kmer = dna_[random_number: random_number + k]
motifs.append(kmer)
best_motifs = motifs
best_score = score(best_motifs)
for j in range(0, N):
i = random.randrange(t)
motifs.remove(motifs[i])
profile = ProfileMatrixFromMotifsWithPseudocounts(motifs, 1)
dnai_motifs= []
for l in range(len(dna[i]) - k + 1):
dnai_motifs.append(dna[i][l:l+k])
probs = Probabilities(dnai_motifs, profile)
my_motif = RandomMotif(probs, dna[i], k)
motifs.insert(i, my_motif)
my_score = score(motifs)
if my_score < best_score:
best_motifs = motifs
best_score = my_score
return (best_motifs, best_score)
def GibbsSamplerIterative(dna, k, t, N):
best_motifs = []
for dna_ in dna:
random_number = random.randint(0,len(dna_) - k)
kmer = dna_[random_number: random_number + k]
best_motifs.append(kmer)
best_score = score(best_motifs)
for i in range(0, 250):
(motifs, my_score) = GibbsSampler(dna, k, t, N)
if my_score < best_score:
best_motifs = motifs
best_score = my_score
return best_motifs
'''
file = inputFile(open('../Downloads/dataset_163_4.txt'))
dna = file[0]
k = file[1]
t = file[2]
N = file[3]
N = N//10
motifs = GibbsSamplerIterative(dna, k, t, N)
for motif in motifs:
print(motif)
'''