forked from oxai4science/sdoml-lite
-
Notifications
You must be signed in to change notification settings - Fork 0
/
process_aia.py
252 lines (197 loc) · 9.12 KB
/
process_aia.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
import argparse
import pprint
import sys
import datetime
import os
from tqdm import tqdm
from tqdm.contrib.concurrent import process_map
from sunpy.map import Map
import skimage.transform
import numpy as np
from glob import glob
def has_nan_or_inf(data):
if np.isnan(data).any():
return True
if np.isinf(data).any():
return True
if np.isneginf(data).any():
return True
return False
def normalize(args):
try:
source_file, aia_cutoffs = args
target_file = source_file.replace('_unnormalized.npy', '.npy')
data = np.load(source_file)
print('\nSource: {}'.format(source_file))
fn = os.path.basename(source_file).replace("_unnormalized.npy","")
wavelength = int(fn.split("_")[-1])
data = np.sqrt(data)
c = np.sqrt(aia_cutoffs[wavelength])
data = np.clip(data, a_min=None, a_max=c)
data = data / c
np.save(target_file, data)
print('Target: {}'.format(target_file))
# Delete the unnormalized file
os.remove(source_file)
print('Deleted: {}'.format(source_file))
return True
except Exception as e:
print('Error: {}'.format(e))
return False
# AIA postprocessing based on SDOML code, with some modifications
# https://github.com/SDOML/SDOML/blob/bea846347b2cd64d81fdcf1baf88a245a1bcb429/aia_fits_to_np.py
def process(args):
source_file, target_file, resolution, degradations = args
try:
Xd = Map(source_file)
print('\nSource: {}'.format(source_file))
except Exception as e:
print('Error: {}'.format(e))
return False
X = Xd.data
#make a valid mask; we'll use this to correct for downpush when interpolating AIA
validMask = 1.0 * (X > 0)
X[np.where(X<=0.0)] = 0.0
fn = os.path.basename(source_file)
fn2 = fn.split("_")[0].replace("AIA","")
datestring = "%s-%s-%s" % (fn2[:4],fn2[4:6],fn2[6:8])
wavelength = int(fn.split("_")[-1].replace(".fits",""))
expTime = max(Xd.meta['EXPTIME'],1e-2)
quality = Xd.meta['QUALITY']
correction = degradations[wavelength][datestring]
if quality != 0:
print('Quality flag is not zero: {}'.format(quality))
return False
# Target angular size
trgtAS = 976.0
# Scale factor
rad = Xd.meta['RSUN_OBS']
scale_factor = trgtAS/rad
#fix the translation
t = (X.shape[0]/2.0)-scale_factor*(X.shape[0]/2.0)
#rescale and keep center
XForm = skimage.transform.SimilarityTransform(scale=scale_factor,translation=(t,t))
Xr = skimage.transform.warp(X,XForm.inverse,preserve_range=True,mode='edge',output_shape=(X.shape[0],X.shape[0]))
Xm = skimage.transform.warp(validMask,XForm.inverse,preserve_range=True,mode='edge',output_shape=(X.shape[0],X.shape[0]))
# Note: scaling leaves 10% of image width on each side of the Sun (and likewise for the image height). Measured in 512x512 images but should be the same for 1024x1024 images.
#correct for interpolating over valid pixels
# Xr = np.divide(Xr,(Xm+1e-8))
# The mask application above in the original SDOML code might be bad. It ends up multiplying invalid pixels (value zero in mask) by the large factor 1e+8, instead of nullifying them. Simply multiply by the mask instead.
Xr = Xr * Xm
#correct for exposure time and AIA degradation correction
Xr = Xr / (expTime*correction)
#figure out the integer factor to downsample by mean
divideFactor = int(X.shape[0] / resolution)
Xr = skimage.transform.downscale_local_mean(Xr,(divideFactor,divideFactor))
#make it a sum rather than a mean by multiplying by the number of pixels that were used
Xr = Xr*divideFactor*divideFactor
#cast to fp32
Xr = Xr.astype('float32')
Xr = np.flipud(Xr)
if has_nan_or_inf(Xr):
print('NaN or Inf found in the processed data')
print('Source: {}'.format(source_file))
print('X: {}'.format(X))
print('Xr: {}'.format(Xr))
print('expTime: {}'.format(expTime))
print('correction: {}'.format(correction))
return False
os.makedirs(os.path.dirname(target_file), exist_ok=True)
np.save(target_file, Xr)
print('Target: {}'.format(target_file))
return wavelength, Xr.min(), Xr.max()
def load_degradations(degradation_dir, wavelengths):
def getDegrad(fn):
#map YYYY-MM-DD -> degradation parameter
lines = open(fn).read().strip().split("\n")
degrad = {}
for l in lines:
d, f = l.split(",")
degrad[d[1:11]] = float(f)
return degrad
#return wavelength -> (date -> degradation dictionary)
degrads = {}
for wl in wavelengths:
degrads[wl] = getDegrad(os.path.join(degradation_dir, 'degrad_{}.csv'.format(wl)))
return degrads
def main():
description = 'FDL-X 2024, Radiation team, SDO AIA data processor'
parser = argparse.ArgumentParser(description=description)
parser.add_argument('--source_dir', type=str, help='Source directory', required=True)
parser.add_argument('--target_dir', type=str, help='Destination directory', required=True)
parser.add_argument('--max_workers', type=int, default=1, help='Max workers')
parser.add_argument('--worker_chunk_size', type=int, default=1, help='Chunk size per worker')
parser.add_argument('--resolution', type=int, default=512, help='Pixel resolution of processed images. Should be a divisor of 1024.')
# parser.add_argument('--wavelengths', nargs='+', default=[94,131,171,193,211,1600,1700], help='Wavelengths')
parser.add_argument('--wavelengths', nargs='+', default=[131,171,193,211,1600], help='Wavelengths')
parser.add_argument('--degradation_dir', type=str, default='./degradation/v9', help='Directory with degradation correction files')
args = parser.parse_args()
print(description)
start_time = datetime.datetime.now()
print('Start time: {}'.format(start_time))
print('Arguments:\n{}'.format(' '.join(sys.argv[1:])))
print('Config:')
pprint.pprint(vars(args), depth=2, width=50)
print('**************************')
print('** Phase 1: Postprocessing')
print('**************************')
print('Loading degradations')
degradations = load_degradations(args.degradation_dir, args.wavelengths)
# walk through the source directory with glob, find all .fits files, and create a corresponding file name ending in .npy in the target dir, keeping the directory structure
# set the source and target directories, strip final slash if present
source_dir = args.source_dir.rstrip('/')
target_dir = args.target_dir.rstrip('/')
# get all .fits files in the source directory
fits_files = glob(os.path.join(source_dir, '**', '*.fits'), recursive=True)
if len(fits_files) == 0:
print('No files found in source directory: {}'.format(source_dir))
return
# create a list of tuples with the source and target file names
# be careful to strip or add slashes as needed
file_names = []
for source_file in fits_files:
target_file = source_file.replace(source_dir, target_dir).replace('.fits', '_unnormalized.npy')
file_names.append((source_file, target_file, args.resolution, degradations))
# process the files
results = process_map(process, file_names, max_workers=args.max_workers, chunksize=args.worker_chunk_size)
files_failed = results.count(False)
print('Files processed: {}'.format(len(results) - files_failed))
print('Files failed : {}'.format(files_failed))
print('Files total : {}'.format(len(results)))
print('*************************')
print('** Phase 2: Normalization')
print('*************************')
# construct dictionary of wavelenghts, min values in a numpy array.
min_values = {}
max_values = {}
wavelenghts = []
for result in results:
if result == False:
continue
wavelength, min_value, max_value = result
wavelenghts.append(wavelength)
if wavelength not in min_values:
min_values[wavelength] = []
max_values[wavelength] = []
min_values[wavelength].append(min_value)
max_values[wavelength].append(max_value)
for wavelength in wavelenghts:
min_values[wavelength] = np.array(min_values[wavelength]).min()
max_values[wavelength] = np.array(max_values[wavelength]).max()
file_names_normalize = []
for source_file, target_file, args.resolution, degradations in file_names:
file_names_normalize.append((target_file, max_values))
results = process_map(normalize, file_names_normalize, max_workers=args.max_workers, chunksize=args.worker_chunk_size)
print('Normalization factors')
print('Min values:')
pprint.pprint(min_values)
print('Max values:')
pprint.pprint(max_values)
files_failed = results.count(False)
print('Files processed: {}'.format(len(results) - files_failed))
print('Files failed : {}'.format(files_failed))
print('Files total : {}'.format(len(results)))
print('End time: {}'.format(datetime.datetime.now()))
print('Duration: {}'.format(datetime.datetime.now() - start_time))
if __name__ == '__main__':
main()