forked from asarnow/pyem
-
Notifications
You must be signed in to change notification settings - Fork 0
/
star.py
executable file
·376 lines (328 loc) · 18.7 KB
/
star.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
#!/usr/bin/env python
# Copyright (C) 2018 Daniel Asarnow
# University of California, San Francisco
#
# Simple program for parsing and altering Relion .star files.
# See help text and README file for more information.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import print_function
from six import iteritems
import glob
import json
import numpy as np
import os.path
import pandas as pd
import sys
from pyem import algo
from pyem import geom
from pyem import star
def main(args):
if args.info:
args.input.append(args.output)
df = pd.concat((star.parse_star(inp, augment=args.augment) for inp in args.input), join="inner")
dfaux = None
if args.cls is not None:
df = star.select_classes(df, args.cls)
if args.info:
if star.is_particle_star(df) and star.Relion.CLASS in df.columns:
c = df[star.Relion.CLASS].value_counts()
print("%s particles in %d classes" % ("{:,}".format(df.shape[0]), len(c)))
print(" ".join(['%d: %s (%.2f%%)' % (i, "{:,}".format(s), 100. * s / c.sum())
for i, s in iteritems(c.sort_index())]))
elif star.is_particle_star(df):
print("%s particles" % "{:,}".format(df.shape[0]))
if star.Relion.MICROGRAPH_NAME in df.columns:
mgraphcnt = df[star.Relion.MICROGRAPH_NAME].value_counts()
print("%s micrographs, %s +/- %s particles per micrograph" %
("{:,}".format(len(mgraphcnt)), "{:,.3f}".format(np.mean(mgraphcnt)),
"{:,.3f}".format(np.std(mgraphcnt))))
try:
print("%f A/px (%sX magnification)" % (star.calculate_apix(df), "{:,.0f}".format(df[star.Relion.MAGNIFICATION][0])))
except KeyError:
pass
if len(df.columns.intersection(star.Relion.ORIGINS3D)) > 0:
print("Largest shift is %f pixels" %
np.max(np.abs(df[df.columns.intersection(star.Relion.ORIGINS3D)].values)))
return 0
if args.drop_angles:
df.drop(star.Relion.ANGLES, axis=1, inplace=True, errors="ignore")
if args.drop_containing is not None:
containing_fields = [f for q in args.drop_containing for f in df.columns if q in f]
if args.invert:
containing_fields = df.columns.difference(containing_fields)
df.drop(containing_fields, axis=1, inplace=True, errors="ignore")
if args.offset_group is not None:
df[star.Relion.GROUPNUMBER] += args.offset_group
if args.restack is not None:
if not args.augment:
star.augment_star_ucsf(df, inplace=True)
star.set_original_fields(df, inplace=True)
df[star.UCSF.IMAGE_PATH] = args.restack
df[star.UCSF.IMAGE_INDEX] = np.arange(df.shape[0])
if args.subset is not None:
df = df.loc[df[star.Relion.RANDOMSUBSET] == args.subset]
df.reset_index(inplace=True)
if args.subsample_micrographs is not None:
if args.bootstrap is not None:
print("Only particle sampling allows bootstrapping")
return 1
mgraphs = df[star.Relion.MICROGRAPH_NAME].unique()
if args.subsample_micrographs < 1:
args.subsample_micrographs = np.int(max(np.round(args.subsample_micrographs * len(mgraphs)), 1))
else:
args.subsample_micrographs = np.int(args.subsample_micrographs)
ind = np.random.choice(len(mgraphs), size=args.subsample_micrographs, replace=False)
mask = df[star.Relion.MICROGRAPH_NAME].isin(mgraphs[ind])
if args.auxout is not None:
dfaux = df.loc[~mask]
df = df.loc[mask]
if args.subsample is not None and args.suffix == "":
if args.subsample < 1:
args.subsample = np.int(max(np.round(args.subsample * df.shape[0]), 1))
else:
args.subsample = np.int(args.subsample)
ind = np.random.choice(df.shape[0], size=args.subsample, replace=False)
mask = df.index.isin(ind)
if args.auxout is not None:
dfaux = df.loc[~mask]
df = df.loc[mask]
if args.strip_uid is not None:
df = star.strip_path_uids(df, inplace=True, count=args.strip_uid)
if args.copy_angles is not None:
angle_star = star.parse_star(args.copy_angles, augment=args.augment)
df = star.smart_merge(df, angle_star, fields=star.Relion.ANGLES, key=args.merge_key)
if args.copy_alignments is not None:
align_star = star.parse_star(args.copy_alignments, augment=args.augment)
df = star.smart_merge(df, align_star, fields=star.Relion.ALIGNMENTS, key=args.merge_key)
if args.copy_reconstruct_images is not None:
recon_star = star.parse_star(args.copy_reconstruct_images, augment=args.augment)
df[star.Relion.RECONSTRUCT_IMAGE_NAME] = recon_star[star.Relion.IMAGE_NAME]
if args.transform is not None:
if args.transform.count(",") == 2:
r = geom.euler2rot(*np.deg2rad([np.double(s) for s in args.transform.split(",")]))
else:
r = np.array(json.loads(args.transform))
df = star.transform_star(df, r, inplace=True)
if args.invert_hand:
df = star.invert_hand(df, inplace=True)
if args.copy_paths is not None:
path_star = star.parse_star(args.copy_paths)
star.set_original_fields(df, inplace=True)
df[star.Relion.IMAGE_NAME] = path_star[star.Relion.IMAGE_NAME]
if args.copy_ctf is not None:
ctf_star = pd.concat((star.parse_star(inp, augment=args.augment) for inp in glob.glob(args.copy_ctf)), join="inner")
df = star.smart_merge(df, ctf_star, star.Relion.CTF_PARAMS, key=args.merge_key)
if args.copy_optics is not None:
optics_star = pd.concat((star.parse_star(inp, augment=args.augment) for inp in glob.glob(args.copy_optics)), join="inner")
df = star.smart_merge(df, optics_star, [star.Relion.OPTICSGROUP], key=[star.UCSF.MICROGRAPH_BASENAME])
if args.copy_micrograph_coordinates is not None:
coord_star = pd.concat(
(star.parse_star(inp, augment=args.augment) for inp in glob.glob(args.copy_micrograph_coordinates)), join="inner")
df = star.smart_merge(df, coord_star, fields=star.Relion.MICROGRAPH_COORDS, key=args.merge_key)
if args.scale is not None:
star.scale_coordinates(df, args.scale, inplace=True)
star.scale_origins(df, args.scale, inplace=True)
star.scale_magnification(df, args.scale, inplace=True)
if args.scale_particles is not None:
star.scale_origins(df, args.scale_particles, inplace=True)
star.scale_magnification(df, args.scale_particles, inplace=True)
if args.scale_coordinates is not None:
star.scale_coordinates(df, args.scale_coordinates, inplace=True)
if args.scale_origins is not None:
star.scale_origins(df, args.scale_origins, inplace=True)
if args.scale_magnification is not None:
star.scale_magnification(df, args.scale_magnification, inplace=True)
if args.scale_apix is not None:
star.scale_apix(df, args.scale_apix, inplace=True)
if args.recenter:
df = star.recenter(df, inplace=True)
if args.zero_origins:
df = star.zero_origins(df, inplace=True)
if args.pick:
df.drop(df.columns.difference(star.Relion.PICK_PARAMS), axis=1, inplace=True, errors="ignore")
if args.subsample is not None and args.suffix != "":
if args.subsample < 1:
print("Specific integer sample size")
return 1
nsamplings = args.bootstrap if args.bootstrap is not None else df.shape[0] / np.int(args.subsample)
inds = np.random.choice(df.shape[0], size=(nsamplings, np.int(args.subsample)),
replace=args.bootstrap is not None)
for i, ind in enumerate(inds):
star.write_star(os.path.join(args.output, os.path.basename(args.input[0])[:-5] + args.suffix + "_%d" % (i + 1)),
df.iloc[ind])
if args.to_micrographs:
df = star.to_micrographs(df)
if args.micrograph_range:
df.set_index(star.Relion.MICROGRAPH_NAME, inplace=True)
m, n = [int(tok) for tok in args.micrograph_range.split(",")]
mg = df.index.unique().sort_values()
outside = list(range(0, m)) + list(range(n, len(mg)))
dfaux = df.loc[mg[outside]].reset_index()
df = df.loc[mg[m:n]].reset_index()
if args.micrograph_path is not None:
df = star.replace_micrograph_path(df, args.micrograph_path, inplace=True)
if args.min_separation is not None:
gb = df.groupby(star.Relion.MICROGRAPH_NAME)
dupes = []
for n, g in gb:
nb = algo.query_connected(g[star.Relion.COORDS].values - g[star.Relion.ORIGINS],
args.min_separation / star.calculate_apix(df))
dupes.extend(g.index[~np.isnan(nb)])
dfaux = df.loc[dupes]
df.drop(dupes, inplace=True)
if args.merge_source is not None:
if args.merge_fields is not None:
if "," in args.merge_fields:
args.merge_fields = args.merge_fields.split(",")
else:
args.merge_fields = [args.merge_fields]
else:
print("Merge fields must be specified using --merge-fields")
return 1
if args.merge_key is not None:
if "," in args.merge_key:
args.merge_key = args.merge_key.split(",")
if args.by_original:
args.by_original = star.original_field(args.merge_key)
else:
args.by_original = args.merge_key
merge_star = star.parse_star(args.merge_source, augment=args.augment)
df = star.smart_merge(df, merge_star, fields=args.merge_fields, key=args.merge_key, left_key=args.by_original)
if args.revert_original:
df = star.revert_original(df, inplace=True)
if args.set_optics is not None:
tok = args.set_optics.split(",")
df = star.set_optics_groups(df, sep=tok[0], idx=int(tok[1]), inplace=True)
df.dropna(axis=0, how="any", inplace=True)
if args.offset_optics is not None:
df[star.Relion.OPTICSGROUP] += args.offset_optics
if args.drop_optics_group is not None:
idx = df[star.Relion.OPTICSGROUP].isin(args.drop_optics_group)
if not np.any(idx):
idx = df[star.Relion.OPTICSGROUPNAME].isin(args.drop_optics_group)
if not np.any(idx):
print("No group found to drop")
return 1
df = df.loc[~idx]
if args.split_micrographs:
dfs = star.split_micrographs(df)
for mg in dfs:
star.write_star(os.path.join(args.output, os.path.basename(mg)[:-4]) + args.suffix, dfs[mg])
return 0
if args.auxout is not None and dfaux is not None:
if not args.relion2:
df = star.remove_deprecated_relion2(dfaux, inplace=True)
star.write_star(args.output, df, resort_records=args.sort, simplify=args.augment_output, optics=True)
else:
df = star.remove_new_relion31(dfaux, inplace=True)
star.write_star(args.output, df, resort_records=args.sort, simplify=args.augment_output, optics=False)
if args.output is not None:
if not args.relion2: # Relion 3.1 style output.
df = star.remove_deprecated_relion2(df, inplace=True)
star.write_star(args.output, df, resort_records=args.sort, simplify=args.augment_output, optics=True)
else:
df = star.remove_new_relion31(df, inplace=True)
star.write_star(args.output, df, resort_records=args.sort, simplify=args.augment_output, optics=False)
return 0
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--auxout", help="Auxilliary output .star file with deselected particles", type=str)
parser.add_argument("--noaugment", help="Always augment inputs", dest="augment", action="store_false")
parser.add_argument("--augment-output", help="Write augmented .star files with non-standard fields",
action="store_false")
parser.add_argument("--bootstrap", help="Sample with replacement when creating multiple outputs",
type=int, default=None)
parser.add_argument("--class", help="Keep this class in output, may be passed multiple times",
action="append", type=int, dest="cls")
parser.add_argument("--copy-angles",
help="Source for particle Euler angles (must align exactly with input .star file)",
type=str)
parser.add_argument("--copy-alignments", help="Source for alignment parameters (angles and shifts)")
parser.add_argument("--copy-ctf", help="Source for CTF parameters (file or quoted glob)")
parser.add_argument("--copy-optics", help="Source for optics groups")
parser.add_argument("--copy-micrograph-coordinates", help="Source for micrograph paths and particle coordinates (file or quoted glob)",
type=str)
parser.add_argument("--copy-paths", help="Source for particle paths (must align exactly with input .star file)",
type=str)
parser.add_argument("--copy-reconstruct-images", help="Source for rlnReconstructImage (must align exactly with input .star file)")
parser.add_argument("--merge-source", help="Source .star for merge")
parser.add_argument("--merge-fields", help="Field(s) to merge", metavar="f1,f2...fN", type=str)
parser.add_argument("--merge-key", help="Override merge key detection with explicit key field(s)",
metavar="f1,f2...fN", type=str)
parser.add_argument("--by-original", help="Merge using \"original\" field name in input .star", action="store_true")
parser.add_argument("--revert-original", help="Swap ImageName and ImageOriginalName before writing", action="store_true")
parser.add_argument("--drop-angles", help="Drop tilt, psi and rot angles from output",
action="store_true")
parser.add_argument("--drop-containing",
help="Drop fields containing string from output, may be passed multiple times",
action="append")
parser.add_argument("--drop-optics-group", help="Drop this optics group, may be passed multiple times", action="append")
parser.add_argument("--info", help="Print information about initial file",
action="store_true")
parser.add_argument("--invert", help="Invert field match conditions",
action="store_true")
parser.add_argument("--offset-group", help="Add fixed offset to group number",
type=int)
parser.add_argument("--restack", help="Stack path for new contiguous particle")
parser.add_argument("--pick", help="Only keep fields output by Gautomatch",
action="store_true")
parser.add_argument("--recenter", help="Subtract origin from coordinates, leaving subpixel information in origin",
action="store_true")
parser.add_argument("--zero-origins", help="Subtract origin from coordinates and set origin to zero",
action="store_true")
# parser.add_argument("--seed", help="Seed for random number generators",
# type=int)
parser.add_argument("--min-separation", help="Minimum distance in Angstroms between particle coordinates", type=float)
parser.add_argument("--scale", help="Factor to rescale particle coordinates, origins, and magnification",
type=float)
parser.add_argument("--scale-particles",
help="Factor to rescale particle origins and magnification (rebin refined particles)",
type=float)
parser.add_argument("--scale-coordinates", help="Factor to rescale particle coordinates",
type=float)
parser.add_argument("--scale-origins", help="Factor to rescale particle origins",
type=float)
parser.add_argument("--scale-magnification", help="Factor to rescale magnification (pixel size)",
type=float)
parser.add_argument("--scale-apix", help="Factor to rescale image pixel size directly (Relion 3.1+)",
type=float)
parser.add_argument("--split-micrographs", help="Write separate output file for each micrograph",
action="store_true")
parser.add_argument("--micrograph-range", help="Write micrographs with alphanumeric sort index [m, n) to output file",
metavar="m,n")
parser.add_argument("--subset", help="Select one half-set", type=int)
parser.add_argument("--subsample", help="Randomly subsample remaining particles",
type=float, metavar="N")
parser.add_argument("--subsample-micrographs", help="Randomly subsample micrographs",
type=float)
parser.add_argument("--suffix", help="Suffix for multiple output files",
type=str, default="")
parser.add_argument("--to-micrographs", help="Convert particles STAR to micrographs STAR",
action="store_true")
parser.add_argument("--micrograph-path", help="Replacement path for micrographs")
parser.add_argument("--strip-uid", help="Strip UIDs in particle and micrograph paths", nargs="?", type=int, default=None)
parser.add_argument("--set-optics", help="Determine optics groups from micrograph basename using a separator and index (e.g. _,4)", type=str)
parser.add_argument("--offset-optics", help="Offset the optics groups by N", type=int, metavar="N")
parser.add_argument("--transform",
help="Apply rotation matrix or 3x4 rotation plus translation matrix to particles (Numpy format)",
type=str)
parser.add_argument("--invert-hand", help="Alter Euler angles to invert handedness of reconstruction",
action="store_true")
parser.add_argument("--sort", help="Natsort the output file", action="store_true")
parser.add_argument("--relion2", "-r2", help="Write Relion2 compatible STAR file", action="store_true")
parser.add_argument("input", help="Input .star file(s) or unquoted glob", nargs="*")
parser.add_argument("output", help="Output .star file")
sys.exit(main(parser.parse_args()))