Source code for ndmapper.io.mapio

# Copyright(c) 2015-2016 Association of Universities for Research in Astronomy, Inc.
# by James E.H. Turner.

import hashlib

import numpy as np

from astropy.nddata import StdDevUncertainty
from astropy.table import Table
import astropy.io.fits as pyfits

from ._util import get_backend_fn


__all__ = ['NDMapIO', 'TabMapIO']


[docs]class NDMapIO(object): """ Propagate additional information needed for `NDLater` instances to support lazy loading, allow saving only arrays/header attributes that have changed & report which FITS extensions they came from for IRAF etc. For lazy-loading or saving operations to succeed, the corresponding file must already exist. This class is intended to encapsulate bookkeeping within `NDLater` (managed by a `DataFile` instance) with reasonable overheads, rather than to provide a robust API: for the user-level interface, see `DataFile` instead. Attributes ---------- filename : `str` The path to the file from which the data are to be mapped. ident : `int` or `str` or `None` Group identifier appropriate for the file type (int EXTVER for FITS), which labels this particular `NDData` instance within a `DataFile`. data_idx : `int` uncertainty_idx : `int` or `None` flags_idx : `int` or `None` The original index of each constituent data/uncertainty/flags array within the host file (extension number for FITS). """ _data_hash = None _uncertainty_hash = None _flags_hash = None def __init__(self, filename, ident=None, data_idx=None, \ uncertainty_idx=None, flags_idx=None): # This must maintain a separate copy of the host object's filename, # otherwise lazy loading of data not yet in memory will fail when # changing the filename of a DataFile instance and trying to save it. # This should perhaps be changed to cache a reference to its data so # that one NDLater instance instantiated from another will share the # same data arrays independently of whether lazy loading is triggered # before or after instantiation. Once one of them is saved, it will # still get re-mapped independently. if not isinstance(filename, basestring): raise ValueError('filename must be supplied as a string') self.filename = filename self.ident = ident self.data_idx = data_idx self.uncertainty_idx = uncertainty_idx self.flags_idx = flags_idx self._dloader = get_backend_fn('load_array', self.filename) self._mloader = get_backend_fn('load_array_meta', self.filename) self._saver = get_backend_fn('save_array', self.filename) # Consider automatically determining ident from the input here # (ie. setting it to hdu.ver == EXTVER) if None.
[docs] def load_data(self): data = self._dloader(self.filename, self.data_idx) # A NumPy array is directly hashable -- but doing so pulls memory- # mapped data entirely into memory, where they stay until unloaded # with "del ndd.data". A workaround of reading the file twice would # negate the intended benefit of being able to save it intelligently, # so just disable hashing in the first instance and ask Erik B. about # it later. It might be better to determine whether the copy is dirty # using object ids (weakref) and memory mapping instead, like PyFITS, # but that might mean re-reading the file after saving, to establish # memory mapping before we can mark the buffer clean. # self._data_hash = hashlib.sha1(data).hexdigest() return data
# def save_data(self, data, header, force=False): # # Should hash meta-data as well here, or else we'll lose changes that # # aren't associated with changes to data. # newhash = hashlib.sha1(data).hexdigest() # if force or newhash != self._data_hash: # self._data_hash = newhash # self._saver(self.filename, self.data_idx, data, header)
[docs] def load_uncertainty(self): if self.uncertainty_idx: uncert = self._dloader(self.filename, self.uncertainty_idx) # Presumably this kills any memory mapping? Worry about it later. # The sqrt is just a temporary hack until I write a Var subclass. # StdDevUncertainty isn't directly hashable so cast to str first # (also see load_data above for another reason). uncert = StdDevUncertainty(np.sqrt(uncert)) # self._uncert_hash = hashlib.sha1(uncert).hexdigest() return uncert
[docs] def load_flags(self): if self.flags_idx: flags = self._dloader(self.filename, self.flags_idx) # self._flags_hash = hashlib.sha1(flags).hexdigest() return flags
[docs] def load_meta(self): meta = self._mloader(self.filename, self.data_idx) # This cast to str is a little bit slow, so let's see whether the hash # here turns out to be premature optimization before reinstating it: # self._meta_hash = hashlib.sha1(str(meta)).hexdigest() return meta
[docs]class TabMapIO(object): """ A proxy object for lazily loading/saving AstroPy Table instances. This is similar to `NDMapIO`, but instead of being used by an `NDData` sub-class to load its own attributes lazily, `TabMapIO` is used to initialize a normal `Table` instance on demand, since the latter doesn't have several data arrays to load separately and sub-classing `Table` would likely prove more complicated with less benefit. At the user level, instances are managed by, and the corresponding table data accessed via, `DataFile` objects. For lazy-loading or saving operations to succeed, the corresponding file must already exist. Attributes ---------- filename : `str` The path to the file from which the data are to be mapped. label : `str` Application-specific label/name identifying the type of `Table` (EXTNAME for FITS). Multiple tables of the same type can be distinguished via the ident parameter. ident : `int` or `str` or `None` Identifier appropriate for the file type (int EXTVER for FITS), which distinguishes this particular instance of a given type of Table within the applicable DataFile. idx : `int` The original array index/number within the host file (extension number for FITS). """ _table = None def __init__(self, filename, idx, label=None, ident=None): # This must maintain a separate copy of the host object's filename, # otherwise lazy loading of data not yet in memory will fail when # changing the filename of a DataFile instance and trying to save it. if not isinstance(filename, basestring): raise ValueError('filename must be supplied as a string') self.filename = filename self.idx = idx self.label = label self.ident = ident self._dloader = get_backend_fn('load_table', self.filename) self._mloader = get_backend_fn('load_table_meta', self.filename) # self._saver = get_backend_fn('save_table', self.filename)
[docs] def load_data(self): data = self._dloader(self.filename, self.idx) return data
[docs] def load_meta(self): meta = self._mloader(self.filename, self.idx) return meta
[docs] def load_table(self): meta = self.load_meta() data = self.load_data() self._table = Table(data=data, meta=meta, copy=False)
@property def table(self): if not self._table: self.load_table() return self._table @table.setter def table(self, value): # Should this preserve the existing label & ident? Should it update # them in the new Table's meta (which would mean making a copy)? # EXTNAME & EXTVER should probably be removed while in memory instead. # Avoid converting existing Table instances to Table because that # converts .meta from an io.fits header to an OrderedDict, which it # turns out can choke on some odd values such as HISTORY. if not isinstance(value, Table): try: value = Table(value, copy=False) except ValueError: raise TypeError('value of .table must be convertible to Table') self._table = value
[docs] def copy(self): """ Generate a new instance that shares any already-loaded data but can be re-mapped independently. """ newinst = TabMapIO(self.filename, self.idx, self.label, self.ident) newinst._table = self._table return newinst