This repository has been archived by the owner on Nov 22, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
206 lines (175 loc) · 8.48 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
import argparse
import sys
import textwrap
import overwriters
from modes import Inspector, Mask, ROI, Spreadsheet, CVAT, Transform, Classify, Label
from pipes import Resize, Crop
from filters import PhaseFilter, FilterableFilter
from statistic import Statistic
MODES = {
'inspector': Inspector,
'mask': Mask,
'roi': ROI,
'spreadsheet': Spreadsheet,
'cvat': CVAT,
'transform': Transform,
'classify': Classify,
'label': Label
}
OVERWRITERS = {
'cvat': overwriters.CVAT
}
def parse_arguments():
parser = argparse.ArgumentParser(
description='This is a preprocessor to remove DICOM masks '
'and generate segmentations and its inspectors, masks and ROIs.',
formatter_class=argparse.RawTextHelpFormatter
)
parser.add_argument('--dcm-dir',
required=True,
help='The DICOM root directory')
parser.add_argument('--label-dir',
required=True,
help='The JSON labels root directory')
parser.add_argument('--target-dir',
required=True,
help='The destination root directory for outputs')
parser.add_argument('--mode',
required=True,
choices=list(MODES.keys()),
default=list(MODES.keys())[0],
help=textwrap.dedent('''\
inspector Generate four-in-one images to compare masks, overlay
and noise-eliminated with original image
mask Generate binary masks that will be used as Dataset for segmentation models
roi Generate region-of-interest images that will be used
as Dataset for classification model
spreadsheet Generate CSV files that contains encrypted
patients identifiers and its file name
cvat Generate a XML file that contains segmentation mask polygons
to be uploaded on CVAT
transform Generate the original dataset images but necessarily transformed
classify Generate region-of-interest images that will be used
as Dataset for classification model, but sorts into phase labels
label Generate a CSV file that contains file name and its cancer phases
'''))
parser.add_argument('--filterable-csv-file',
help='The CSV file to be used for filtering broken datasets out')
parser.add_argument('--filterable-dataset-type',
choices=['train', 'valid', 'test'],
help='The type of dataset source directory for querying filterable CSV file')
parser.add_argument('--filterable-keep-issues',
action='store_true',
help='A flag to keep issued rows in filterable CSV file')
parser.add_argument('--overwrite-label-type',
choices=['cvat'],
help=textwrap.dedent('''\
The type of overrideable labels format to parse
cvat CVAT 1.1 XML annotation format
Pass 'annotations.xml' file to --overwrite-label-file argument
'''))
parser.add_argument('--overwrite-label-file',
help='The label file to be used for overwriting dataset labels')
parser.add_argument('--new-shape',
help='WxH. Resize the output image with desired width and height - e.g.) 224x224')
parser.add_argument('--crop-image',
help='X:Y,W:H. Crop the output image to desired rectangle - e.g.) 90:0,480:480')
parser.add_argument('--jobs',
default=-1,
help='Number of workers')
args = parser.parse_args()
if args.new_shape is not None:
if 'x' not in args.new_shape:
print('{}: error: the following argument --new-shape requires to follow WxH format'.format(__file__))
sys.exit(1)
new_shape = args.new_shape.split('x')
else:
new_shape = None
if args.crop_image is not None:
pairs = args.crop_image.split(',')
if ':' not in args.crop_image or ',' not in args.crop_image or len(pairs) != 2:
print('{}: error: the following argument --crop-image requires to follow X:Y,W:H format'.format(__file__))
sys.exit(1)
coordinate, size = [pair.split(':') for pair in pairs]
crop_rect = {
'x': coordinate[0],
'y': coordinate[1],
'w': size[0],
'h': size[1]
}
else:
crop_rect = None
filterable_csv_file = args.filterable_csv_file
filterable_dataset_type = args.filterable_dataset_type
filterable_keep_issues = args.filterable_keep_issues
if (filterable_csv_file is not None and filterable_dataset_type is None) or \
(filterable_csv_file is None and filterable_dataset_type is not None):
print('{}: error: --filterable-csv-file and --filterable-dataset-type'
' arguments must be existed at the same time.'.format(__file__))
sys.exit(1)
if filterable_keep_issues and (filterable_csv_file is None or filterable_dataset_type is None):
print('{}: error: --filterable-csv-file and --filterable-dataset-type'
' arguments must be existed when --filterable-keep-issues exists.'.format(__file__))
sys.exit(1)
overwrite_label_type = args.overwrite_label_type
overwrite_label_file = args.overwrite_label_file
if (overwrite_label_type is not None and overwrite_label_file is None) or \
(overwrite_label_type is None and overwrite_label_file is not None):
print('{}: error: --overwrite-label-type and --overwrite-label-file'
' arguments must be existed at the same time.'.format(__file__))
sys.exit(1)
return args.dcm_dir, args.label_dir, args.target_dir, \
args.mode, int(args.jobs), \
new_shape, crop_rect, filterable_csv_file, filterable_dataset_type, filterable_keep_issues, \
overwrite_label_type, overwrite_label_file
def main():
print()
dcm_dirpath, label_dirpath, target_dirpath, \
mode_name, jobs, \
new_shape, crop_rect, \
filterable_csv_file, filterable_dataset_type, filterable_keep_issues, \
overwrite_label_type, overwrite_label_file = parse_arguments()
if mode_name not in MODES:
print('Unrecognizable mode argument: {}'.format(mode_name))
return
mode_type = MODES[mode_name]
pipes = []
filters = []
# Cropping have lower priority than resizing
if crop_rect is not None:
pipe = Crop(**crop_rect)
pipes.append(pipe)
if new_shape is not None:
pipe = Resize((int(new_shape[1]), int(new_shape[0]))) # WxH -> HxW
pipes.append(pipe)
if filterable_csv_file is not None and filterable_dataset_type is not None:
filters.append(FilterableFilter(filterable_csv_file, filterable_dataset_type, filterable_keep_issues))
for pipe in pipes:
pipe.inform()
# Filters
filters.append(PhaseFilter())
# Overwriters
overwriter = None
if overwrite_label_type is not None and overwrite_label_file is not None:
if overwrite_label_type not in OVERWRITERS:
print('Unrecognizable overwriter argument: {}'.format(overwrite_label_type))
return
overwriter = OVERWRITERS[overwrite_label_type]
if overwriter is not None:
overwriter = overwriter()
print('Current overwriter: {}'.format(overwriter.name))
overwritable_labels = overwriter.parse_labels(overwrite_label_file)
else:
overwritable_labels = None
if overwritable_labels is None or not isinstance(overwritable_labels, Statistic):
mode = mode_type(dcm_dirpath, label_dirpath, target_dirpath, pipes, filters)
if overwritable_labels is not None:
mode.set_overwritable_labels(overwritable_labels)
statistic = mode.parse_and_preprocess_dirs(jobs)
else:
statistic = overwritable_labels
print("Job statistics:")
for name, count in statistic.container.items():
print('\t{}: {}'.format(name, count))
if __name__ == '__main__':
main()