-
Notifications
You must be signed in to change notification settings - Fork 5
/
pdb.py
143 lines (127 loc) · 4.57 KB
/
pdb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import gzip
import os
import sys
try:
from urllib2 import urlopen, HTTPError # Python 2
except ImportError:
from urllib.request import urlopen # Python 3
from urllib.error import HTTPError
if __name__ == '__main__' and __package__ is None:
sys.path.insert(1, os.path.dirname(os.path.dirname(os.path.abspath(__file__
))))
from dimple.cell import Cell
from dimple.utils import comment, put_error
class PdbMeta(Cell):
def __init__(self, cryst1_line):
assert cryst1_line.startswith('CRYST1')
a = float(cryst1_line[6:15])
b = float(cryst1_line[15:24])
c = float(cryst1_line[24:33])
alpha = float(cryst1_line[33:40])
beta = float(cryst1_line[40:47])
gamma = float(cryst1_line[47:54])
symmetry = cryst1_line[55:66].strip()
Cell.__init__(self, (a, b, c, alpha, beta, gamma), symmetry)
self.has_hetatm_x = None
def read_metadata(pdb, print_errors):
if pdb.endswith('.gz'):
f = gzip.open(pdb, 'rt')
else:
f = open(pdb)
meta = None
for line in f:
if line.startswith('CRYST1'):
meta = PdbMeta(line)
break
if meta is None and print_errors:
if f.tell() == 0:
put_error('empty file: %s' % pdb)
else:
put_error('CRYST1 line not found in %s' % pdb)
f.close()
return meta
def check_hetatm_x(filename, meta):
if meta and meta.has_hetatm_x is not None:
return meta.has_hetatm_x
with open(filename) as f:
has_hetatm_x = any(line[:6] == 'HETATM' and line[76:78] == ' X'
for line in f)
if meta:
meta.has_hetatm_x = has_hetatm_x
return has_hetatm_x
def remove_hetatm(filename_in, file_out, remove_all):
"""Remove HETATM and related lines.
If remove_all is False, remove only element X which happens in many PDB
entries but is not accepted by pointless, refmac, phaser, etc
"""
# we could instead zero occupancy of the atoms and replace X with Y
file_in = open(filename_in)
removed = set()
def is_removed(serial):
return serial and not serial.isspace() and int(serial) in removed
for line in file_in:
record = line[:6]
if record == 'HETATM':
if remove_all or line[76:78] == ' X':
atom_serial_num = int(line[6:11])
removed.add(atom_serial_num)
continue
elif record in ('HET ', 'HETNAM', 'HETSYN', 'FORMUL'):
continue
elif line.startswith('ANISOU'):
if is_removed(line[6:11]):
continue
elif line.startswith('CONECT'):
if any(is_removed(line[p:p+5]) for p in (6, 11, 16, 21, 26)):
continue
file_out.write(line)
return len(removed)
def is_pdb_id(a):
return len(a) == 4 and a[0].isdigit() and a[1:].isalnum()
def download_pdb(pdb_id, output_dir):
filename = pdb_id.upper()+'.pdb'
path = os.path.join(output_dir, filename)
if os.path.exists(path):
comment('%s: using existing file %s\n' % (pdb_id, filename))
else:
comment('Downloading %s from RCSB... ' % pdb_id)
url = 'https://files.rcsb.org/download/%s.pdb' % pdb_id.lower()
try:
u = urlopen(url)
except HTTPError as e:
put_error(str(e))
sys.exit(1)
content = u.read()
try:
if not os.path.isdir(output_dir):
os.makedirs(output_dir)
with open(path, 'wb') as f:
f.write(content)
comment('done.\n')
except IOError as e:
put_error('Failed to save downloaded file on disk',
comment=str(e))
sys.exit(1)
return path
def main():
if len(sys.argv) < 2:
sys.stderr.write('Usage: pdb.py [get|vol|nohet] file1.pdb ...\n')
sys.exit(1)
if sys.argv[1] == 'nohet':
remove_hetatm(sys.argv[2], sys.stdout, remove_all=True)
elif sys.argv[1] == 'vol':
print(read_metadata(sys.argv[2], print_errors=True).get_volume())
elif sys.argv[1] == 'get':
for arg in sys.argv[2:]:
if is_pdb_id(arg):
path = download_pdb(arg, os.getcwd())
print('-> ' + path)
else:
sys.stderr.write('Error: %s is not a pdb code.\n' % arg)
sys.exit(1)
else:
for arg in sys.argv[1:]:
print('File: %s' % arg)
print(read_metadata(arg, print_errors=True))
if __name__ == '__main__':
main()