Skip to content

Commit

Permalink
Merge pull request #718 from dhomeier/read-fileobj-safe
Browse files Browse the repository at this point in the history
Use read_fileobj_or_hdulist in all loaders and keep `gzip`, `bz2` file objects open in contextmanager
  • Loading branch information
eteq committed Jan 7, 2021
1 parent 067e309 commit cfae031
Show file tree
Hide file tree
Showing 17 changed files with 530 additions and 583 deletions.
105 changes: 43 additions & 62 deletions specutils/io/default_loaders/apogee.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@

import numpy as np

from specutils.io.registers import data_loader, custom_writer
from specutils import Spectrum1D
from ...spectra import Spectrum1D
from ..registers import data_loader
from ..parsing_utils import read_fileobj_or_hdulist

__all__ = ['apVisit_identify', 'apStar_identify', 'aspcapStar_identify',
'apVisit_loader', 'apStar_loader', 'aspcapStar_loader']
Expand All @@ -27,7 +28,7 @@ def apVisit_identify(origin, *args, **kwargs):
Check whether given file is FITS. This is used for Astropy I/O
Registry.
"""
with fits.open(args[0]) as hdulist:
with read_fileobj_or_hdulist(*args, **kwargs) as hdulist:
# Test if fits has extension of type BinTable and check for
# apVisit-specific keys
return (hdulist[0].header.get('SURVEY') == 'apogee' and
Expand All @@ -42,7 +43,7 @@ def apStar_identify(origin, *args, **kwargs):
Check whether given file is FITS. This is used for Astropy I/O
Registry.
"""
with fits.open(args[0]) as hdulist:
with read_fileobj_or_hdulist(*args, **kwargs) as hdulist:
# Test if fits has extension of type BinTable and check for
# apogee-specific keys + keys for individual apVisits
return (hdulist[0].header.get('SURVEY') == 'apogee' and
Expand All @@ -54,7 +55,7 @@ def aspcapStar_identify(origin, *args, **kwargs):
Check whether given file is FITS. This is used for Astropy I/O
Registry.
"""
with fits.open(args[0]) as hdulist:
with read_fileobj_or_hdulist(*args, **kwargs) as hdulist:
# Test if fits has extension of type BinTable and check for
# aspcapStar-specific keys
return (hdulist[0].header.get('TARG1') is not None and
Expand All @@ -80,33 +81,28 @@ def apVisit_loader(file_obj, **kwargs):
data: Spectrum1D
The spectrum that is represented by the data in this table.
"""
if isinstance(file_obj, fits.hdu.hdulist.HDUList):
hdulist = file_obj
else:
hdulist = fits.open(file_obj, **kwargs)

header = hdulist[0].header
meta = {'header': header}

# spectrum is stored in three rows (for three chips)
data = np.concatenate([hdulist[1].data[0, :],
hdulist[1].data[1, :],
hdulist[1].data[2, :]])
unit = Unit('1e-17 erg / (Angstrom cm2 s)')

stdev = np.concatenate([hdulist[2].data[0, :],
hdulist[2].data[1, :],
hdulist[2].data[2, :]])
uncertainty = StdDevUncertainty(stdev * unit)

# Dispersion is not a simple function in these files. There's a
# look-up table instead.
dispersion = np.concatenate([hdulist[4].data[0, :],
hdulist[4].data[1, :],
hdulist[4].data[2, :]])
dispersion_unit = Unit('Angstrom')
if not isinstance(file_obj, fits.hdu.hdulist.HDUList):
hdulist.close()

with read_fileobj_or_hdulist(file_obj, **kwargs) as hdulist:
header = hdulist[0].header
meta = {'header': header}

# spectrum is stored in three rows (for three chips)
data = np.concatenate([hdulist[1].data[0, :],
hdulist[1].data[1, :],
hdulist[1].data[2, :]])
unit = Unit('1e-17 erg / (Angstrom cm2 s)')

stdev = np.concatenate([hdulist[2].data[0, :],
hdulist[2].data[1, :],
hdulist[2].data[2, :]])
uncertainty = StdDevUncertainty(stdev * unit)

# Dispersion is not a simple function in these files. There's a
# look-up table instead.
dispersion = np.concatenate([hdulist[4].data[0, :],
hdulist[4].data[1, :],
hdulist[4].data[2, :]])
dispersion_unit = Unit('Angstrom')

return Spectrum1D(data=data * unit,
uncertainty=uncertainty,
Expand All @@ -130,30 +126,22 @@ def apStar_loader(file_obj, **kwargs):
data: Spectrum1D
The spectrum that is represented by the data in this table.
"""
if isinstance(file_obj, fits.hdu.hdulist.HDUList):
close_hdulist = False
hdulist = file_obj
else:
close_hdulist = True
hdulist = fits.open(file_obj, **kwargs)

header = hdulist[0].header
meta = {'header': header}
wcs = WCS(hdulist[1].header)
with read_fileobj_or_hdulist(file_obj, **kwargs) as hdulist:
header = hdulist[0].header
meta = {'header': header}
wcs = WCS(hdulist[1].header)

data = hdulist[1].data[0, :] # spectrum in the first row of the first extension
unit = Unit('1e-17 erg / (Angstrom cm2 s)')
data = hdulist[1].data[0, :] # spectrum in the first row of the first extension
unit = Unit('1e-17 erg / (Angstrom cm2 s)')

uncertainty = StdDevUncertainty(hdulist[2].data[0, :])
uncertainty = StdDevUncertainty(hdulist[2].data[0, :])

# dispersion from the WCS but convert out of logspace
# dispersion = 10**wcs.all_pix2world(np.arange(data.shape[0]), 0)[0]
dispersion = 10**wcs.all_pix2world(np.vstack((np.arange(data.shape[0]),
np.zeros((data.shape[0],)))).T,
0)[:, 0]
np.zeros((data.shape[0],)))).T, 0)[:, 0]
dispersion_unit = Unit('Angstrom')
if close_hdulist:
hdulist.close()

return Spectrum1D(data=data * unit,
uncertainty=uncertainty,
Expand All @@ -177,27 +165,20 @@ def aspcapStar_loader(file_obj, **kwargs):
data: Spectrum1D
The spectrum that is represented by the data in this table.
"""
if isinstance(file_obj, fits.hdu.hdulist.HDUList):
close_hdulist = False
hdulist = file_obj
else:
close_hdulist = True
hdulist = fits.open(file_obj, **kwargs)

header = hdulist[0].header
meta = {'header': header}
wcs = WCS(hdulist[1].header)
with read_fileobj_or_hdulist(file_obj, **kwargs) as hdulist:
header = hdulist[0].header
meta = {'header': header}
wcs = WCS(hdulist[1].header)

data = hdulist[1].data # spectrum in the first extension
unit = def_unit('arbitrary units')
data = hdulist[1].data # spectrum in the first extension
unit = def_unit('arbitrary units')

uncertainty = StdDevUncertainty(hdulist[2].data)
uncertainty = StdDevUncertainty(hdulist[2].data)

# dispersion from the WCS but convert out of logspace
dispersion = 10**wcs.all_pix2world(np.arange(data.shape[0]), 0)[0]
dispersion_unit = Unit('Angstrom')
if close_hdulist:
hdulist.close()

return Spectrum1D(data=data * unit,
uncertainty=uncertainty,
Expand Down
9 changes: 5 additions & 4 deletions specutils/io/default_loaders/generic_cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,24 @@
from astropy.units import Unit
from astropy.wcs import WCS

from ..registers import data_loader
from ...spectra import Spectrum1D
from ..registers import data_loader
from ..parsing_utils import read_fileobj_or_hdulist


# Define an optional identifier. If made specific enough, this circumvents the
# need to add `format="my-format"` in the `Spectrum1D.read` call.
def identify_generic_fits(origin, *args, **kwargs):
with fits.open(args[0]) as hdulist:
with read_fileobj_or_hdulist(*args, **kwargs) as hdulist:
return (hdulist[0].header['NAXIS'] == 3)


# not yet ready because it's not generic enough and does not use column_mapping
# @data_loader("Cube", identifier=identify_generic_fits, extensions=['fits'])
def generic_fits(file_name, **kwargs):
def generic_fits(file_obj, **kwargs):
name = os.path.basename(file_name.rstrip(os.sep)).rsplit('.', 1)[0]

with fits.open(file_name, **kwargs) as hdulist:
with read_fileobj_or_hdulist(file_obj, **kwargs) as hdulist:
header = hdulist[0].header
data3 = hdulist[0].data
wcs = WCS(header)
Expand Down
36 changes: 14 additions & 22 deletions specutils/io/default_loaders/hst_cos.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,17 @@
from astropy.units import Unit
from astropy.nddata import StdDevUncertainty

from specutils.io.registers import data_loader
from specutils import Spectrum1D
from ...spectra import Spectrum1D
from ..registers import data_loader
from ..parsing_utils import read_fileobj_or_hdulist

__all__ = ['cos_identify', 'cos_spectrum_loader']


def cos_identify(origin, *args, **kwargs):
"""Check whether given file contains HST/COS spectral data."""
with fits.open(args[0]) as hdulist:
if hdulist[0].header['TELESCOP'] == 'HST' and hdulist[0].header['INSTRUME'] == 'COS':
return True

return False
with read_fileobj_or_hdulist(*args, **kwargs) as hdulist:
return (hdulist[0].header['TELESCOP'] == 'HST' and hdulist[0].header['INSTRUME'] == 'COS')


@data_loader(label="HST/COS", identifier=cos_identify, extensions=['FITS', 'FIT', 'fits', 'fit'])
Expand All @@ -35,29 +33,23 @@ def cos_spectrum_loader(file_obj, **kwargs):
data: Spectrum1D
The spectrum that is represented by the data in this table.
"""
if isinstance(file_obj, fits.hdu.hdulist.HDUList):
hdulist = file_obj
else:
hdulist = fits.open(file_obj, **kwargs)

header = hdulist[0].header
name = header.get('FILENAME')
meta = {'header': header}
with read_fileobj_or_hdulist(file_obj, **kwargs) as hdulist:
header = hdulist[0].header
name = header.get('FILENAME')
meta = {'header': header}

unit = Unit("erg/cm**2 Angstrom s")
disp_unit = Unit('Angstrom')
data = hdulist[1].data['FLUX'].flatten() * unit
dispersion = hdulist[1].data['wavelength'].flatten() * disp_unit
uncertainty = StdDevUncertainty(hdulist[1].data["ERROR"].flatten() * unit)
unit = Unit("erg/cm**2 Angstrom s")
disp_unit = Unit('Angstrom')
data = hdulist[1].data['FLUX'].flatten() * unit
dispersion = hdulist[1].data['wavelength'].flatten() * disp_unit
uncertainty = StdDevUncertainty(hdulist[1].data["ERROR"].flatten() * unit)

sort_idx = dispersion.argsort()
dispersion = dispersion[sort_idx]
data = data[sort_idx]
uncertainty = uncertainty[sort_idx]

if not isinstance(file_obj, fits.hdu.hdulist.HDUList):
hdulist.close()

return Spectrum1D(flux=data,
spectral_axis=dispersion,
uncertainty=uncertainty,
Expand Down
34 changes: 14 additions & 20 deletions specutils/io/default_loaders/hst_stis.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@
from astropy.units import Unit
from astropy.nddata import StdDevUncertainty

from specutils.io.registers import data_loader
from specutils import Spectrum1D
from ...spectra import Spectrum1D
from ..registers import data_loader
from ..parsing_utils import read_fileobj_or_hdulist

__all__ = ['stis_identify', 'stis_spectrum_loader']


def stis_identify(origin, *args, **kwargs):
"""Check whether given file contains HST/STIS spectral data."""
with fits.open(args[0]) as hdulist:
if hdulist[0].header['TELESCOP'] == 'HST' and hdulist[0].header['INSTRUME'] == 'STIS':
return True
with read_fileobj_or_hdulist(*args, **kwargs) as hdulist:
return (hdulist[0].header['TELESCOP'] == 'HST' and hdulist[0].header['INSTRUME'] == 'STIS')

return False

Expand All @@ -35,29 +35,23 @@ def stis_spectrum_loader(file_obj, **kwargs):
data: Spectrum1D
The spectrum that is represented by the data in this table.
"""
if isinstance(file_obj, fits.hdu.hdulist.HDUList):
hdulist = file_obj
else:
hdulist = fits.open(file_obj, **kwargs)

header = hdulist[0].header
name = header.get('FILENAME')
meta = {'header': header}
with read_fileobj_or_hdulist(file_obj, **kwargs) as hdulist:
header = hdulist[0].header
name = header.get('FILENAME')
meta = {'header': header}

unit = Unit("erg/cm**2 Angstrom s")
disp_unit = Unit('Angstrom')
data = hdulist[1].data['FLUX'].flatten() * unit
dispersion = hdulist[1].data['wavelength'].flatten() * disp_unit
uncertainty = StdDevUncertainty(hdulist[1].data["ERROR"].flatten() * unit)
unit = Unit("erg/cm**2 Angstrom s")
disp_unit = Unit('Angstrom')
data = hdulist[1].data['FLUX'].flatten() * unit
dispersion = hdulist[1].data['wavelength'].flatten() * disp_unit
uncertainty = StdDevUncertainty(hdulist[1].data["ERROR"].flatten() * unit)

sort_idx = dispersion.argsort()
dispersion = dispersion[sort_idx]
data = data[sort_idx]
uncertainty = uncertainty[sort_idx]

if not isinstance(file_obj, fits.hdu.hdulist.HDUList):
hdulist.close()

return Spectrum1D(flux=data,
spectral_axis=dispersion,
uncertainty=uncertainty,
Expand Down
Loading

0 comments on commit cfae031

Please sign in to comment.