Source code for ndmapper.data

# Copyright(c) 2015-2016 Association of Universities for Research in Astronomy, Inc.
# by James E.H. Turner.

# Code & documentation based on NDData: Copyright (c) 2011-2015, Astropy
# Developers. Licensed under a 3-clause BSD style license - see ASTROPY.rst.

"""
A module representing files containing pixel data as high-level objects that
contain `astropy.nddata`-like instances (and vice versa).
"""

import os.path
import string
import re
from copy import copy, deepcopy

import numpy as np

#from astropy.units import Unit, Quantity
from astropy.nddata import NDDataBase, NDData, NDDataArray
# from astropy.nddata.mixins.ndarithmetic import _arit_doc
from astropy.table import Table
from astropy.utils.compat.odict import OrderedDict
# from astropy.utils import format_doc

from . import config
from . import io as ndmio
from .io import NDMapIO, TabMapIO
from .libutils import splitext, new_filename


__all__ = ['FileName', 'DataFile', 'DataFileList', 'NDLater', 'load_file_list',
           'temp_saved_datafile']

__doctest_skip__ = ['load_file_list']


[docs]class FileName(object):
    """
    A class used by `DataFile` and, potentially, user scripts for parsing
    filenames into components, modifying and reconstructing them. As a mutable
    class, it may also be used to keep that information synchronized between
    interested parties.

    Parameters
    ----------

    path : str or FileName, optional
        Single filename to parse into a FileName object representation
        (defaults to an empty string).

    sep : str or None, optional
        Separator for suffix components (defaults to "_").

    strip : bool, optional
        Remove any existing prefix and suffixes from the supplied path
        (prior to adding any specified prefix & suffix)?

    prefix : str or None, optional
        Prefix string to add before the base filename.

    suffix : str or None, optional
        Suffix string to add after the base filename (including any initial
        separator).

    dirname : str or None, optional
        Directory name to add to the path (replacing any existing directory).

    regex : str or re or None
        Regular expression matching base filename (without a file extension).
        By default this is None, causing the value of the package configuration
        variable "ndmapper.config['filename_regex']" to be used, which
        defaults to Gemini's "S20150101S0001"-style convention (thus allowing
        use of other conventions without having to override the regex every
        time a DataFile is instantiated, as well as optionally allowing the
        default to be pre-compiled).


    Attributes
    ----------

    dir : str
        Directory that the file resides in.

    prefix : str
        Sequence of characters preceding the base name.

    base : str
        Base filename in a standard format that can be recognized via the
        regex parameter, eg. S20150307S0001 for Gemini data. This is the
        original filename without the file extension, before any processing
        prefix/suffix are added. Not to be confused (due to lack of a clear
        alternative term) with the Unix "basename", which would be equivalent
        to root + ext, where root = prefix + base + suffix.

    suffix : list
        List of one or more suffixes following the base name, including any
        separator character (eg. _forStack).

    ext : str
        File extension(s), eg. "fits" or "fits.gz".

    sep : str or None
        One or more characters specified as a suffix separator.

    orig : str
        The original filename with no prefix, suffix or directory, equivalent
        to `base` + `ext` (read only). This is not one of the parsed components
        and exists for convenience in look-ups, list comprehensions etc.

    dotext : str
        The file extension as a string, beginning with the separator ('.'),
        unless blank. This read-only attribute is provided for convenience;
        the corresponding parsed filename component is `ext` (which excludes
        the separator and may be None, when there is no extension).

    root : str
        The root filename (prefix + base + suffix). This read-only attribute
        is provided for convenience in list comprehensions etc. (eg. to help
        determine IRAF database names) and is not one of the parsed components.

    """

    def __init__(self, path=None, sep='_', strip=False, prefix=None, \
        suffix=None, dirname=None, regex=None):

        # Get default regex from package configuration so non-Gemini users
        # don't have to specify an alternative convention every time this
        # class is instantiated:
        if regex is None:
            regex = config['filename_regex']

        # Compile regular expression if supplied as a string:
        if isinstance(regex, basestring):
            self._re = re.compile(regex)
        else:
            self._re = regex

        # Record what separator we're using:
        self.sep = sep

        # If passed an existing instance, reconstruct and re-parse it, since
        # the regex or separator can differ (and it's simpler to do). Since
        # almost anything can be converted to a string in Python, accept only
        # FileName, DataFile & string types, to avoid confusion.
        if isinstance(path, (FileName, DataFile)):
            path = str(path)
        elif path is not None and not isinstance(path, basestring):
            raise ValueError('path must be a str, %s or DataFile instance' % \
                             str(self.__class__.__name__))

        # Actually parse the path or use placeholder attributes if it's None:
        if path is None:

            # In this case we want a completely empty string to represent the
            # file, so that (eg.) DataFile objects instantiated with None
            # won't get some anomalous default filename.
            self.dir = ''
            self.prefix = ''
            self.base = ''
            self.suffix = []
            self.ext = None

        else:
            # Separate directory, filename root & file extension:
            self.dir = os.path.dirname(path)
            # This splits at the first dot, unlike os.path.splitext:
            froot, self.ext = splitext(os.path.basename(path))

            # Separate any prefix and/or suffixes from the base name:
            match = self._re.search(froot)
            if match:
                self.standard = True
                self.base = match.group()
                if strip:
                    self.prefix = ''
                    self.suffix = []
                else:
                    self.prefix = froot[:match.start()]
                    self.suffix = self._split(froot[match.end():])
            else:
                self.standard = False
                self.base = froot
                self.prefix = ''
                self.suffix = []

        # Add on any specified prefix or suffix:
        if prefix:
            self.prefix = prefix + self.prefix
        if suffix:
            self.suffix.extend(self._split(suffix))

        # Add or replace any initial directory name if specified:
        if dirname is not None:
            self.dir = dirname

    # Split a string (suffix) into a list that includes the separator
    # character at the start of each element that originally had one (which
    # the first element is not bound to):
    def _split(self, suff):
        ls = suff.split(self.sep)  # always produces at least one element
        # If string starts with a separator, omit empty initial string
        # produced by the split:
        if not ls[0]:
            result = []
        # Otherwise, include the beginning of the string before the first
        # separator as-is:
        else:
            result = [ls[0]]
        # Restore the separator char before any subsequent elements:
        sep = ' ' if self.sep is None else self.sep
        result += [sep + s for s in ls[1:]]
        return result

    @property
    def re(self):
        return self._re

    @property
    def dotext(self):
        return '' if self.ext is None else os.extsep + self.ext

    @property
    def root(self):
        return self.prefix + self.base + ''.join(self.suffix)

    # Show something meaningful when inspecting an instance:
    def __repr__(self):
        return 'FileName \'{0}\''.format(str(self))

    # Reconstruct the filename when printing the instance value (after any
    # user modifications to the individual components):
    def __str__(self):
        return (os.path.join(self.dir, self.root + self.dotext))
    @property
    def orig(self):
        return self.base + self.dotext

    def __deepcopy__(self, memo):
        # One can't copy a regex, except by re.compile(_re.pattern), but they
        # look to be immutable anyway so this shouldn't be a problem; likewise
        # for strings:
        return FileName(path=str(self), sep=self.sep, regex=self._re)

    def __eq__(self, other):
        return os.path.abspath(str(self)) == os.path.abspath(str(other))

    def __ne__(self, other):
        return not self == other


[docs]class DataFile(object):
    """
    A class representing a data-file-like object, including a filename and/or
    a list of associated NDData instances, along with any ancillary data such
    as a primary header, binary tables that describe the file as a whole and
    aren't part of the NDData structure itself or processing history.
    
    This class can simply store a collection of associated NDData instances in
    memory or it can simply store a filename, eg. for use with IRAF tasks that
    do disk-based I/O, or it can do both, handling any loading & saving. In
    principle this abstraction allows mixing Python code operating on NDData
    fairly seamlessly with steps that do disk I/O (eg. IRAF tasks, as long as
    expectations regarding metadata are compatible), as well as providing a
    convenient file-like way of organizing an NDData collection.

    Parameters
    ----------

    filename : `str` or `FileName`, optional
        The filename on disk of the dataset(s) to be represented.

    data : NDData or list of NDData or DataFile or None, optional
        NDData instance(s) for the dataset(s) to be represented, or an
        existing DataFile instance (in which case the result will be a new
        DataFile referring to the same NDData instance(s) as the original
        [this will be changed so they are separate instances referring to the
        same data, allowing lazy loading to be mapped differently, eg. after
        one file is saved, to avoid surprises]).

        If "filename" refers to an existing file and "data" is None, the data
        will be mapped lazily from that file (data=[] can be used to avoid
        this if the intention is to replace any existing data). Otherwise,
        the user-specified data will be used instead.

    meta : dict-like, optional
        A meta-data dictionary / header describing the file as a whole
        (rather than any individual nddata object), replacing any existing
        meta-data if the file already exists on disk.

    mode : str, optional
        'read' (default), 'new', 'update' or 'overwrite'
        Specifies whether the file should exist on disk already and be used to
        initialize this DataFile (if a filename is provided) and whether it can
        later be written to disk. Although ndmapper does not hold file handles
        open with a fixed access mode, these options enforce the user's
        declared intention, to avoid mishaps (such as inadvertently overwriting
        input files or failures when working in the wrong directory). With
        'read' and 'update', the specified file must already exist, with 'new',
        it must not exist and with 'overwrite', any existing file is ignored
        and will be replaced when writing to disk. The 'data' and 'filename'
        parameters always override whatever would otherwise be read from disk.
        When no `filename` is given, the default is 'new' instead of 'read'.

    strip : bool, optional
        Remove any existing prefix and suffixes from the supplied filename
        (prior to adding any specified prefix & suffix)?

    prefix : str or None, optional
        Prefix string to add before the base filename.

    suffix : str or None, optional
        Suffix string to add after the base filename (including any initial
        separator).

    dirname : str or None, optional
        Directory name to add to the filename (replacing any existing dir).

    labels : str or dict of str : str, optional
        Naming of each NDData component array in the corresponding file on
        disk, overriding the package default values in config['labels']. Where
        a string is given, it overrides only the label of the main data array
        (ie. config['labels']['data']).

    Attributes
    ----------

    filename : FileName or None
        A filename-parsing object representing the path on disk.

    mode : str
        File access mode (see parameters). This initially reflects the
        corresponding parameter value / default and can change implicitly in
        certain circumstances when loading, saving or changing the filename.

    meta : dict-like
        The header/meta-data associated with the file as a whole (eg. the
        primary FITS header) rather than an individual nddata instance.

    cals : dict of str : DataFile
        A dictionary of associated processed calibrations, mapping calibration
        types (eg. 'bias') to DataFile instances that can be (or have been)
        used by the corresponding calibration steps. This attribute is
        currently not persistent when saving to disk, so any calibrations must
        be associated explicitly on reloading, if needed.

    The NDData instance(s) associated with the DataFile are accessed by
    iterating over or subscripting it like a list.

    """

    _filename = None
    _meta = None
    _tables = None    # change later to _extras?
    _labels = None
    _cals = None

    log = ''


    def __init__(self, filename=None, data=None, meta=None, mode=None,
        strip=False, prefix=None, suffix=None, dirname=None, labels=None):

        if isinstance(data, DataFile):  # create new copy of existing obj
            # New NDLater instances & a new table proxy list allow later
            # re-mapping when saving without changing the original DataFile.
            self._data = [NDLater(data=d) for d in data]
            self._tables = [tp for tp in data._tables]
            self._meta = deepcopy(data._meta)
            self._filename = deepcopy(data.filename)
            self._labels = copy(data._labels)
            self._cals = data._cals
        elif isinstance(data, NDDataBase):
            self._data = [NDLater(data=data)]
        elif hasattr(data, '__iter__') and \
            all([isinstance(d, (NDDataBase)) for d in data]):  # True for []
            # Sequence of NDData (or empty to ignore existing file data):
            self._data = [NDLater(data=d) for d in data]
        elif data is None:
            self._data = None
        else:
            raise TypeError('data parameter has an unexpected type')

        # Use any filename & meta-data inherited from input DataFile unless
        # specified:
        if not filename:
            filename = self._filename
        if meta is not None:
            self._meta = meta

        # Do likewise for component array labels, defaulting to the package
        # config values if unspecified and not copying another DataFile. Any
        # defaults not overridden still apply, to allow specifying a subset
        # without leaving things unlabelled.
        if not self._labels:
            self._labels = copy(config['labels'])
        if labels:
            if isinstance(labels, basestring):
                self._labels['data'] = labels
            elif hasattr(labels, 'keys'):
                self._labels.update(labels)
            else:
                raise TypeError('labels parameter has an unexpected type')

        # Parse any filename into a FileName object:
        self._filename = FileName(filename, strip=strip, prefix=prefix,
                                  suffix=suffix, dirname=dirname)

        # Load the file if requested and it contents haven't been overridden
        # and check that it exists or doesn't, to match expectations:
        exists = os.path.exists(str(self.filename))
        read_file = False
        if mode is None:
            mode = 'read' if str(self.filename) else 'new'
        if mode in ['read', 'update']:
            read_file = True
            if not exists:
                raise IOError('%s not found' % str(self.filename))
        elif mode == 'new':
            if exists:
                raise IOError('%s already exists' % str(self.filename))
        elif mode != 'overwrite':
            raise ValueError('unrecognized file mode, \'%s\'' % mode)
        self._mode = mode

        if read_file and self.meta is None:
            self._load_meta()
        if read_file and data is None:
            self._load_data()

        # Ensure (meta-)data attributes have the right types & track how many
        # NDData objects this instance contains. An empty DataFile has length
        # zero.
        if self._data is None:
            self._data = []
        if self._tables is None:
            self._tables = []
        if self.meta is None:
            self._meta = OrderedDict()
        if self._cals is None:
            self._cals = {}
        self._len = len(self._data)

        # Has the user overridden or accessed the file contents since
        # instantiation (if not, we can avoid saving without checking hashes
        # when supplying a DataFileList to an external program such as an
        # IRAF task via run_task)?
        self._unloaded = meta is None and data is None

    @property
    def filename(self):
        return self._filename

    # Re-parse any change of filename after instantiation. To override prefixes
    # etc., the user can supply a FileName instance as the argument. Also
    # update the mode to 'new' so that previously-read-only files may now be
    # saved if appropriate, without inadvertently clobbering any existing copy.
    @filename.setter
    def filename(self, value):
        if value != self._filename:
            self._filename = FileName(value)
            self._mode = 'new'

    @property
    def mode(self):
        # TO DO: make this update mode first if the filename attributes have
        # changed, eg. by storing a string of whatever it was last, otherwise
        # the mode doesn't change to 'new' as when replacing the filename.
        return self._mode

    # To do: this needs a corresponding setter adding, to allow copying the
    # header from another instance.
    @property
    def meta(self):
        self._unloaded = False
        return self._meta

    @property
    def cals(self):
        return self._cals

    # Specify that class manages its own iteration with next() method:
    def __iter__(self):
        self._n = 0
        return self

    # Iteration over the DataFile instance returns ~NDData:
[docs]    def next(self):
        self._n += 1
        if self._n > self._len:
            raise StopIteration
        else:
            return self._data[self._n-1]

    # Allow subscripting this instance to get ~NDData:
    def __getitem__(self, key):
        return self._data[key]

    # Replacement of an existing ~NDData instance:
    def __setitem__(self, key, value):
        # Disallow non-trivial indexing when setting values, for the time
        # being, which can have strange results unless implemented carefully:
        if not isinstance(key, (int, long)):
            raise IndexError('may only assign to a single numeric index')
        self._data[key] = NDLater(data=value)

    # Deletion of an item from the DataFile:
    # This is a simplistic implementation for now; it will probably need to
    # handle some cleaning up etc.
    def __delitem__(self, key):
        del self._data[key]  # this can also be passed a slice
        self._len = len(self._data)

    # When printing the instance, show the filename:
    def __str__(self):
        return str(self.filename)

    # Need to fix this to do something sensible when there's no filename??
    # This produces an unquoted string in lists!
    def __repr__(self):
        return 'DataFile \'%s\' (len %d)' % (self.__str__(), self._len)

    # Append a new NDData instance to the DataFile:
[docs]    def append(self, item):
        # For the time being, restrict the items to append to NDData sub-
        # classes, otherwise quite strange types of arrays can result:
        if not isinstance(item, NDDataBase):
            raise ValueError('append argument should be NDData compatible')
        item = NDLater(data=item)
        # We pretty much have to recalculate the largest numeric identifier
        # here, since it would be fiddly to track when the user changes one.
        # Only auto-number new NDData if they don't already have identifiers.
        if item.ident is None:
            item.ident = self.next_ident
        self._data.append(item)
        self._len += 1
        self._unloaded = False

    # Extend the DataFile with the contents of another DataFile or list of
    # NDData-derived objects (or single object, like append). If the input is
    # another DataFile instance, any existing filename is dropped.
[docs]    def extend(self, items):
        if not isinstance(items, DataFile):
            items = DataFile(data=items)
        # Only auto-number new NDData if they don't already have identifiers.
        if all([ndd.ident is None for ndd in items]):
            for ident, item in enumerate(items, start=self.next_ident):
                item.ident = ident
        self._data += items._data
        # TO DO: re-number table idents if there's duplication between the dfs?
        # Maybe need "discard=True" option to eliminate duplicates?
        self._tables += items._tables
        self._len = len(self._data)
        self._unloaded = False

    def __len__(self):
        return self._len

    # Lazily-load the data from file. This is not closely tied to FITS but only
    # flat lists of NDData objects are supported, rather than any arbitrary
    # hierarchy supported by, say, HDF5.
    def _load_data(self):
        data_maps, table_maps = ndmio.map_file(str(self.filename),
                                               labels=self._labels)
        self._data = [NDLater(iomap=iomap) for iomap in data_maps]
        # Table proxy objects are kept directly in DataFile, rather than used
        # by a sub-class as for NDData, since there is little need to load
        # separate parts of a Table on demand (& it could be complicated):
        self._tables = table_maps

    def _load_meta(self):
        self._meta = ndmio.load_common_meta(str(self.filename))

[docs]    def reload(self):
        """
        Re-load NDData instances & shared meta-data from the associated file
        on disk (eg. to synchronize the DataFile instance with any changes made
        by external programs such as IRAF).

        When instantiating a new DataFile object, it is unnecessary to run
        reload() afterwards if the associated file already exists; it will be
        read automatically.

        Note that data arrays are not actually copied into memory here; they
        are re-mapped and still lazily-loaded once referenced (if applicable).
        """
        if not str(self.filename):
            raise IOError('Attempt to re-load DataFile object with no ' \
                          'associated file')

        # Currently if the user overwrites data in memory by reloading the
        # file, that's tough luck; safeguarding volatile memory is considered
        # less critical than data on disk.

        # If the file doesn't exist etc., just pass through the IOError from
        # PyFITS, without masking the origin of any more obscure errors:
        self._load_meta()
        self._load_data()
        self._len = len(self._data)

        # If an unsaved file with 'new' or 'overwrite' mode is reloaded after
        # some external process (IRAF) creates it, change the mode to 'update',
        # to reflect our copy now being based on the "hard" copy, just as at
        # instantiation:
        if self.mode in ['new', 'overwrite']:
            self._mode = 'update'

        # Since everything has been re-mapped from file here, we can reset
        # the unloaded flag (unlike when saving, where the user may still hold
        # a reference to the thing that was saved):
        self._unloaded = True

[docs]    def save(self):
        """
        Save NDData instances & common meta-data to the associated file,
        creating it if it doesn't already exist.

        """
        # Consider saving the mode in self so we can re-check here that the
        # file (non-)existence is still as expected.

        # Disallow overwriting stuff that the user didn't originally expect to.
        # This can be circumvented by changing the filename or casting to a
        # new DataFile copy with a different mode.
        if self.mode == 'read':
            raise IOError('attempted to save {0} with mode \'read\''\
                          .format(str(self.filename)))
        elif self.mode == 'new' and os.path.exists(str(self.filename)):
            raise IOError('file {0} with mode \'new\' would now overwrite an '\
                          'existing copy'.format(str(self.filename)))

        # This code should be made to append None values for data groups that
        # have not changed since last loaded or saved, which the back-end will
        # then skip re-writing if it can. However, there are various ways for
        # this to backfire if not done very carefully (aka. premature
        # optimization) so this is left as an exercise for later.

        data_label = self._labels['data']
        uncertainty_label = self._labels['uncertainty']
        flags_label = self._labels['flags']

        # Construct flat lists for the array, meta & (name, ident) tuple,
        # to pass to the save_list function. Also record the file location
        # index for each saved attribute, to allow remapping to the new file.

        # Currently it's assumed that image arrays & tables go in the same flat
        # list of arrays, which might be invalid for formats other than FITS.

        data_list, meta_list, identifiers, type_list = [], [], [], []
        imapidx, tmapidx = [], []

        idx = 0

        # Add any tables at the beginning of the file, to keep the NDData
        # arrays together if more get appended later:
        for tproxy in self._tables:
            idx += 1
            data_list.append(np.array(tproxy.table))
            meta_list.append(tproxy.table.meta)
            identifiers.append((tproxy.label, tproxy.ident))
            type_list.append('table')
            tmapidx.append(idx)

        # Add the NDData component arrays:
        for ndd in self._data:

            ident = ndd.ident

            arr_group = (ndd.data,
                         ndd.uncertainty.array**2 if ndd.uncertainty else None,
                         ndd.flags)

            meta_group = (ndd.meta, None, None)

            id_group = ((data_label, ident), (uncertainty_label, ident),
                        (flags_label, ident))

            type_group = ('image',) * 3

            # Include list entries for the main data array and only non-empty
            # uncertainty/flags (passing None values to save_list for those
            # would cause any existing information to be preserved at the
            # applicable location in the file, which isn't what we want).
            for arr, meta, arr_id, arr_type in \
                zip(arr_group, meta_group, id_group, type_group):

                if arr is not None or arr_id[0] == data_label:
                    idx += 1
                    data_list.append(arr)
                    meta_list.append(meta)
                    identifiers.append(arr_id)
                    type_list.append(arr_type)
                    imapidx.append(idx)
                else:
                    imapidx.append(None)

        ndmio.save_list(str(self.filename), data_list, meta_list, identifiers,
                        type_list, self.meta)

        # If the save succeeded without raising an exception, remap each
        # Table proxy & each NDLater's _io attribute to the newly-saved file.

        for (n, tproxy), idx in zip(enumerate(self._tables), tmapidx):
            self._tables[n] = TabMapIO(str(self.filename), idx=idx,
                                       label=tproxy.label, ident=tproxy.ident)

        for ndd, data_idx, uncertainty_idx, flags_idx in \
            zip(self._data, *[iter(imapidx)]*3):

            # Initialize a new _io instance in case it doesn't exist already:
            ndd._io = NDMapIO(str(self.filename),
                              ident=ndd.ident, data_idx=data_idx,
                              uncertainty_idx=uncertainty_idx,
                              flags_idx=flags_idx)

        # If the file mode was 'new', it needs changing to 'update' now it
        # has been saved, to allow saving further changes; likewise for
        # 'overwrite', to reflect any subsequent saves being based on the
        # existing file:
        if self.mode in ['new', 'overwrite']:
            self._mode = 'update'

    @property
    def unloaded(self):
        # To qualify as unloaded, the DataFile itself (ie. meta) has not to
        # have been touched, nor each constituent NDLater instance nor the
        # filename (WRT what NDLater is lazy-loading):
        return self._unloaded and all([ndd.unloaded and \
            ndd._io.filename == self.filename for ndd in self])

    @property
    def next_ident(self):
        """
        Return the next integer identifier greater than any existing integer
        identifier(s) or None if one or more existing identifier is undefined.
        """
        idents = [as_int_or_none(ndd.ident) for ndd in self]
        if not idents:
            ident = 1
        elif any([val is None for val in idents]):
            ident = None  # don't try to set ids if they're incomplete anyway
        else:
            ident = max(idents)
            ident = ident + 1 if ident else 1  # don't rely on False==0 FWIW
        return ident

[docs]    def renumber(self, idents=None):
        """
        Re-number/name the (ident attributes of) member NDData instances,
        either with user-supplied values or sequentially.

        Parameters
        ----------

        idents : list of int, optional
            The identifiers to use, if not numbering sequentially from 1.
            The intention is also to allow a list of str later (see NDLater).

        """

        if idents:
            if not isinstance(idents, list) or len(idents) != len(self):
                raise ValueError('idents must be a list matching the '\
                                 'DataFile length')
            pairs = zip(idents, self)
        else:
            pairs = enumerate(self, start=1)

        for ident, ndd in pairs:
            ndd.ident = ident

        self._unloaded = False

    def _arith(self, operand, operation, filename=None, **kwargs):

        # Distinguish unary & binary operators by presence/absence of operand:
        if operand is None:
            operand = []
            binary = False
        else:
            binary = True

        # If the second operand isn't already a DataFile, try to converting it
        # to one via NDData, so we can iterate over things uniformly below:
        if not isinstance(operand, DataFile):
            operand = self.__class__(data=NDLater(data=operand))

        len_data = len(self)
        len_operand = len(operand)

        # Make sure the operand lengths match:
        if len_operand == 1:
            operand_data = len_data * operand._data
        elif len_operand == len_data:
            operand_data = operand._data
        else:
            raise ValueError('operands have unmatched lengths')

        # Create an unnamed output DataFile based on this one (first operand):
        outdf = self.__class__(filename=filename, meta=deepcopy(self.meta),
                               mode='new', labels=copy(self._labels))
        outdf._tables = [tp for tp in self._tables]
        outdf._cals = self._cals   # Should this be a copy (also in __init__)?

        # Combine each pair of NDData instances in the 2 lists (files) using
        # the first instance's own operator:
        for op1, op2 in zip(self, operand_data):
            op_fn = getattr(op1, operation)
            if binary:
                result = op_fn(op2, **kwargs)
            else:
                result = op_fn(**kwargs)
            outdf.append(result)

        return outdf

[docs]    def add(self, operand, **kwargs):
        return self._arith(operand, 'add', **kwargs)

    def __add__(self, operand):
        return self.add(operand)

[docs]    def subtract(self, operand, **kwargs):
        return self._arith(operand, 'subtract', **kwargs)

    def __sub__(self, operand):
        return self.subtract(operand)

[docs]    def multiply(self, operand, **kwargs):
        return self._arith(operand, 'multiply', **kwargs)

    def __mul__(self, operand):
        return self.multiply(operand)

[docs]    def divide(self, operand, **kwargs):
        return self._arith(operand, 'divide', **kwargs)

    def __div__(self, operand):
        return self.divide(operand)

    def __truediv__(self, operand):
        return self.divide(operand)

[docs]    def bitwise_or(self, operand, **kwargs):
        return self._arith(operand, 'bitwise_or', **kwargs)

    def __or__(self, operand):
        return self.bitwise_or(operand)

[docs]    def bitwise_and(self, operand, **kwargs):
        return self._arith(operand, 'bitwise_and', **kwargs)

    def __and__(self, operand):
        return self.bitwise_and(operand)

[docs]    def bitwise_xor(self, operand, **kwargs):
        return self._arith(operand, 'bitwise_xor', **kwargs)

    def __xor__(self, operand):
        return self.bitwise_xor(operand)

[docs]    def invert(self, **kwargs):
        return self._arith(None, 'invert', **kwargs)

    def __invert__(self):
        return self.invert()


def as_int_or_none(val):
    """
    Convert int or str(int) to an integer, preserving None values and returning
    False for other types.
    """
    if val is None or isinstance(val, (int, long)):
        result = val
    elif isinstance(val, basestring):
        try:
            result = int(val)
        except ValueError, TypeError:
            result = False
    else:
        result = False
    return result


[docs]class DataFileList(list):
    """
    A class that holds a list of DataFile objects, tracking filenames
    and/or NDData collections with ancillary information. This implementation 
    is pretty much a normal Python list but provides a more convenient
    interface for instantiating multiple DataFile objects from a list of
    filenames or nddata instances/lists. 

    Parameters
    ----------

    filenames : `str` or `list` of `str`, optional
        The filename(s) on disk of the dataset(s) to be represented. There
        should either be as many filenames as data or None (but lists of
        NDData can be nested to associate subsets with fewer filenames).

    data : `DataFile` or `DataFileList` or (nested list of) `NDData`, optional
        NDData/DataFile instance(s) for the dataset(s) to be represented (or
        an existing DataFileList instance). Any member DataFile instances will
        become new copies if any of the filename-modifying parameters are set,
        otherwise the DataFileList will simply hold references to the original
        instances (allowing manipulation of existing DataFiles via new lists).

    meta : `dict`-like or `list` of `dict`-like or `None`, optional
        The header/meta-data associated with each file as a whole (eg. the
        primary FITS header) rather than with individual nddata instances.
        There should be one instance per file or None (which preserves any
        information from an existing file).

    mode : `str`, optional
        'read' (default), 'new', 'update' or 'overwrite'
        Specifies whether the file should exist on disk already and be used to
        initialize this DataFile (if a filename is provided) and whether it can
        later be written to disk (also see DataFile). With 'read' and 'update',
        the specified file must already exist, with 'new', it must not exist
        and with 'overwrite', any existing file is ignored and will be replaced
        when writing to disk. The 'data' and 'filename' parameters always
        override whatever would otherwise be read from disk. If mode is None,
        it will be taken from any DataFile instances given as data, as long as
        they are all the same (raising an exception if not), defaulting to
        'read' if no DataFile instances are provided.

    strip : `bool`
        Remove any existing prefix and suffixes from the supplied filename
        (prior to adding any specified prefix & suffix)?

    prefix : `str`, None
        Prefix string to add before the base filename.

    suffix : `str`, None
        Suffix string to add after the base filename (including any initial
        separator).

    dirname : `str`, None
        Directory name to add to the filename (replacing any existing dir).

    """

    def __init__(self, filenames=None, data=None, meta=None, mode=None,
        strip=False, prefix=None, suffix=None, dirname=None):

        # Check our args & if needed expand them out to match:
        filenames, data, meta, mode = self._expand_args(filenames=filenames,
            data=data, meta=meta, mode=mode, strip=strip, prefix=prefix,
            suffix=suffix, dirname=dirname)

        # If no mods are specified to the input DataFiles, use them directly:
        if filenames is None:  # means _expand_args just passed through data
            initlist = data
        # Otherwise, cast each data input to new DataFile with requested mods:
        else:
            initlist = [DataFile(filename=fn, data=obj, meta=mdict,
                mode=mode, strip=strip, prefix=prefix, suffix=suffix,
                dirname=dirname) for obj, mdict, fn in \
                zip(data, meta, filenames)]

        # Do whatever initialization a list object normally does:
        list.__init__(self, initlist)

        # Record the specified mode, to apply to any DataFiles added later:
        self._mode = mode

    def _expand_args(self, filenames, data, meta, mode, strip, prefix,
        suffix, dirname):

        # First convert filenames/data/meta to lists (if they aren't already)
        # or None. Although instantiating a list of things, single objects are
        # accepted for convenience and are expanded to match the lengths of
        # the other arguments if needed (except filenames). There isn't really
        # a simple way of doing this with a function because for some arguments
        # we want to distinguish specific list-like objects from a list of
        # those objects, for others, we want to distinguish duck-typed sequence
        # objects from a list etc. -- ie. the criteria vary. Also, it seems
        # safer to require specific input types in some cases and warn the user
        # if something unexpected is received. Handling all this is a little
        # fiddly but should make things conceptually simpler and/or more
        # readable from a user perspective.

        # Data can be a singly-nested list, with one sub-list per DataFile.
        # These cases are enumerated to distinguish them from container lists.
        if _compatible_data_obj(data) or data == []:
            data = [data]
        elif data is not None and not hasattr(data, '__iter__'):
            # If the constituent elements of the list don't have the right
            # type, let DataFile complain rather than checking here.
            raise TypeError('data parameter has an unexpected type')
        # How many data items do we have (None or 1 or length of list >=0)?
        len_data = seqlen(data)

        if isinstance(filenames, basestring):
            filenames = [filenames]
        elif filenames is not None and not hasattr(filenames, '__iter__'):
            raise TypeError('filenames parameter has an unexpected type')
        len_fn = seqlen(filenames)

        if hasattr(meta, 'keys'):  # dict-like (inc. PyFITS headers)
            meta = [meta]
        elif meta is not None and not hasattr(meta, '__iter__'):
            raise TypeError('meta parameter has an unexpected type')
        len_meta = seqlen(meta)

        # If the mode is undefined, use any existing DataFile modes if they're
        # all the same, otherwise default to 'read' as for DataFile.
        if mode is None:
            if isinstance(data, list):
                dfmodes = list(set([item.mode for item in data \
                                    if isinstance(item, DataFile)]))
                nmodes = len(dfmodes)
                if nmodes == 0:
                    mode = 'read'
                elif nmodes == 1:
                    mode = dfmodes[0]
                else:
                    raise ValueError('must specify DataFileList mode when ' \
                        'provided mixed DataFile modes')
            else:
                mode = 'read'

        # Determine whether the filename is being modified, to help decide
        # below whether a new copy of the input is needed:
        fn_modified = filenames or strip or prefix or suffix \
            or dirname is not None

        # If there are any modifiers to the input filename, meta or mode or
        # the data aren't already DataFiles, create a new copy of each DataFile
        # to hold those modifications without affecting existing objects
        # (otherwise, use what we were given directly, to allow making new
        # lists of existing DataFile instances):
        if not (isinstance(data, list) and meta is None and not fn_modified \
           and all([isinstance(item, DataFile) and item.mode == mode \
                    for item in data])):

            # Here we should have an existing DataFileList instance, a list of
            # DataFile objects, list of NDData or list of lists of NDData:

            # Expand out data, filenames & meta to lists of the same length,
            # to avoid repetition below.
            lens = tuple(l for l in (len_fn, len_data, len_meta) \
                         if l is not None)
            listlen = max(lens) if lens else None

            if listlen is None:
                filenames, data, meta = [], [], []
            else:
                listrange = range(listlen)
                # In the special case of filenames, we don't expand out a
                # single value to match the other lists, as any filenames are
                # expected to be unique within any given list:
                filenames = filenames if filenames else \
                            [None for item in listrange]
                data = data if (len_data and len_data > 1) \
                       else [data[0] for item in listrange] if data \
                       else [data for item in listrange]
                       # This last line can produce [None] or [[]], the latter
                       # of which overrides any existing data in the DataFile.
                meta = meta if (len_meta and len_meta > 1) \
                       else [meta[0] for item in listrange] if meta \
                       else [meta for item in listrange]

            if not len(filenames) == len(data) == len(meta):
                raise ValueError('filenames, data & meta args are unmatched ' \
                    'in length')

        return filenames, data, meta, mode

    # Ensure the data to be added have a compatible mode. These rules might
    # still need a bit of tweaking but should be reasonable.
    def _check_mode(self, filename, data, strip, prefix, suffix, dirname):

        is_datafile = isinstance(data, DataFile)
        orig_mode = data.mode if is_datafile else self._mode

        if orig_mode == self._mode:
            return True

        # Disallow appending as-yet-unwritten files when the files in the list
        # are also expected to be on disk (eg. by IRAF):
        if self._mode in ['read', 'update'] and \
           orig_mode in ['new', 'overwrite']:
            raise ValueError('can\'t add unsaved file to DataFileList '\
                             'whose mode=\'{0}\''.format(self._mode))

        # When listing 'new' files, figure out the filename that would be added
        # and ensure it doesn't already exist on disk (simply instantiating
        # DataFile would produce a less clear error for this scenario).
        elif self._mode == 'new':
            fn = FileName(filename if filename else data.filename \
                          if is_datafile else None, strip=strip, prefix=prefix,
                          suffix=suffix, dirname=dirname)
            if os.path.exists(str(fn)):
                raise ValueError('can\'t add already-existing file(s) to '\
                                 'DataFileList whose mode=\'new\'')

        # Disallow overwriting files previously declared read-only by
        # accidentally appending them to a writeable list:
        elif orig_mode == 'read':  # list mode 'update'/'overwrite'
            raise ValueError('can\'t add read-only data to DataFileList '\
                             'whose mode=\'{0}\''.format(self._mode))

        return False

    # Wrap the normal list append:
[docs]    def append(self, data=None, meta=None, filename=None, strip=False, \
        prefix=None, suffix=None, dirname=None):

        # If the filename of the DataSet being appended is unmodified and its
        # existing mode is the same as the list's, we append that instance to
        # allow keeping the same DataFiles in multiple lists, otherwise we cast
        # to a new instance with the appropriate attributes modified. All
        # DataFiles inherit the file mode of the DataFileList. To avoid
        # surprises, the modes of existing DataFiles can only be overridden
        # arbitrarily at instantiation, when done explicitly, whereas here
        # certain combinations are disallowed.

        same_mode = self._check_mode(filename=filename, data=data, strip=strip,
            prefix=prefix, suffix=suffix, dirname=dirname)

        if isinstance(data, DataFile) and same_mode and not (meta or filename \
            or strip or prefix or suffix or dirname is not None):
            newdf = data
        else:
            newdf = DataFile(filename=filename, data=data, meta=meta,
                mode=self._mode, strip=strip, prefix=prefix, suffix=suffix,
                dirname=dirname)

        list.append(self, newdf)

[docs]    def extend(self, data=None, meta=None, filenames=None, strip=False, \
        prefix=None, suffix=None, dirname=None):

        # Check & if needed expand the argument lists, as in init.
        filenames, data, meta, mode = self._expand_args(filenames=filenames,
            data=data, meta=meta, mode=self._mode, strip=strip, prefix=prefix,
            suffix=suffix, dirname=dirname)

        # If no mods are specified to the input DataFiles, use them directly:
        if filenames is None:
            initlist = data

        # Otherwise, cast each data input to new DataFile with requested mods:
        else:
            # This is a bit of a hack to run _check_mode as part of a list
            # comprehension; we only really care whether it raises an exception
            # but comparing its return value with None (which never happens)
            # conforms with the applicable syntax:
            initlist = [DataFile(filename=fn, data=obj, meta=mdict,
                mode=self._mode, strip=strip, prefix=prefix, suffix=suffix,
                dirname=dirname) for obj, mdict, fn in \
                zip(data, meta, filenames) \
                if self._check_mode(filename=fn, data=obj, strip=strip,
                    prefix=prefix, suffix=suffix, dirname=dirname) is not None]

        # Call the usual list.extend() method to finish the job:
        list.extend(self, initlist)

[docs]    def reload(self):
        """
        Reload each constituent DataFile (eg. after the file is modified by
        an IRAF task).
        """
        for df in self:
            df.reload()

        self._update_mode()

[docs]    def save(self):
        """
        Save each constituent DataFile to its pre-defined filename.
        """
        for df in self:
            df.save()

        self._update_mode()

    # Re-calculate the DataFileList mode from its constituent files after
    # loading or saving them:
    def _update_mode(self):

        dfmodes = set([df.mode for df in self])

        self._mode = dfmodes.pop() if len(dfmodes)==1 else 'update'


def seqlen(arg, convert_empty=False):
    """
    Return the length of the argument if a sequence, otherwise 1 or None.
    """
    if hasattr(arg, '__iter__'):   # no strings etc.
        try:
            slen = len(arg)
        except TypeError:
            raise TypeError('seqlen: unexpectedly got an iterable object ' \
                            'with no length!')

        if convert_empty is True and slen == 0:
            slen = None

    elif arg is None:
        slen = None

    else:
        slen = 1

    return slen
    

def _compatible_data_obj(arg):
    # For now DataFile doesn't support ndarray but that's OK, as it reports
    # the same error DataFileList would for unsupported types and this will
    # work if and when it's added there:
    if isinstance(arg, DataFile) or isinstance(arg, NDDataBase) or \
       isinstance(arg, np.ndarray):
        return True
    else:
        return False


[docs]class NDLater(NDDataArray):
    """
    A compatible variant of NDDataArray that facilitates lazy loading of
    pixel data, allowing code to work freely with NDData-like objects
    (including just doing bookeeping with the headers) without using more
    memory than necessary.

    The main API difference from NDDataArray is that NDLater is initialized
    with a file name & indices (eg. FITS extension numbers) for getting/
    saving the data on demand. This very simple interface does not include
    any safety checks and it's the caller's responsibility to take care of
    managing the file structure, existence etc., which can be taken care of
    by the higher-level DataFile class. A subset of NDData.__init__()
    parameters are also accepted, allowing creation from an existing NDData
    instance or ndarrays (eg. for writing to a new file); this will override
    any existing data in the file at the specified indices.

    Any mask, wcs & unit values will be derived (once implemented) directly
    from the input meta-data, rather than specified here, and can then be
    overridden after instantiation if necessary.

    Parameters
    ----------

    data : `~numpy.ndarray` or `NDData`, optional.
        The main data array contained in this object, overriding any existing
        data in the mapped file, if applicable. If the intention is to use a
        new copy of the input object rather than a reference to it, the user
        should make that copy beforehand.

    uncertainty : `~astropy.nddata.NDUncertainty`, optional
        Uncertainties on the data.

    mask : `~numpy.ndarray`-like, optional
        Mask for the data, given as a boolean Numpy array or any object that
        can be converted to a boolean Numpy array with a shape
        matching that of the data. The values must be ``False`` where
        the data is *valid* and ``True`` when it is not (like Numpy
        masked arrays). If ``data`` is a numpy masked array, providing
        ``mask`` here will causes the mask from the masked array to be
        ignored.

        .. warning::
            This option is provided for compatibility with `NDData` but the
            `mask` attribute is currently unused in NDMapper (though it may
            still be manipulated by user code); information about bad pixels
            is instead propagated in `flags`. This may change in future.

    flags : `~numpy.ndarray`-like or `~astropy.nddata.FlagCollection`, optional
        Flags giving information about each pixel. These can be specified
        either as a Numpy array of any type (or an object which can be converted
        to a Numpy array) with a shape matching that of the data, or as a
        `~astropy.nddata.FlagCollection` instance which has a shape matching
        that of the data.

    wcs : undefined, optional
        WCS-object containing the world coordinate system for the data.

        .. warning::
            This is not yet defind because the discussion of how best to
            represent this class's WCS system generically is still under
            consideration. For now just leave it as None.

    meta : `dict`-like object, optional
        Metadata for this object.  "Metadata" here means all information that
        is included with this object but not part of any other attribute
        of this particular object. e.g., creation date, unique identifier,
        simulation parameters, exposure time, telescope name, etc.

    unit : `~astropy.units.UnitBase` instance or str, optional
        The units of the data.

    ident : `int` (`str` to be supported later), optional
        File-independent identifier for this NDLater instance (eg. MOS slit
        number, CCD/amplifier number or object name). This is used to
        determine the correspondence of NDLater instances across multiple
        DataFile objects. [The use of string identifiers would currently
        cause incompatibility with IRAF run_task, until more bookkeeping is
        added to map them to numeric EXTVERs for the FITS kernel; also, the
        back-end loader/saver would also need modifications to handle them.]

    iomap : `NDMapIO`, optional
        An object that maps the data, uncertainty & flags attributes to a file
        name and indices within that file, enabling those attribute values to
        be lazily loaded. Internally, this iomap remains pointing to the
        existing file location with which the NDLater instance was last
        synchronized (loaded/saved), independently of whether the host DataFile
        (if any) changes, ensuring that the expected data are loaded on demand
        (as long as the original file still exists). The iomap.ident attribute
        is therefore independent of NDLater.ident, to allow renaming/numbering.

    Attributes
    ----------

    ident : `int` (`str` to be supported later)
        As described above. When the NDLater instance is read from a FITS file
        via DataFile, this value defaults to EXTVER (unless the file was saved
        from a previous instance with the value overridden). This attribute is
        an API short cut to meta['NDM_ID'], where the value persists.

    (See NDDataArray doc string for other methods & attributes.
     This is a Work in progress, to support DataFile.)

    """

    _id_key = 'NDM_ID'

    _arith_defaults = {'propagate_uncertainties' : True,
                       'handle_mask' : np.bitwise_or,
                       'handle_meta' : 'first_found'
                      }

    # After implementing lazy loading with NDLater instead of loading
    # everything in _load_nddata_from_FITS, 11 tests now take ~2.3s total to
    # run instead of ~1.2s (I think with 2 tests that read 2 SCI exts each).

    # This is based on the NDData & NDDataArray __init__ but avoids referencing
    # array attributes here, instead storing an obj that knows how to get them.
    def __init__(self, data=None, uncertainty=None, mask=None, wcs=None,
                 flags=None, meta=None, unit=None, ident=None, iomap=None):

        if iomap and not isinstance(iomap, NDMapIO):
            raise TypeError('iomap must be an NDMapIO instance')

        if data is None and iomap is None:
            raise ValueError('must provide either data or iomap')

        # Remember our "parent class", for later use in getters/setters, where
        # to be on the safe side, we invoke the NDDataArray getter/setter logic
        # after actually loading the data array(s).
        self._parent = super(NDLater, self)

        # Add support for deriving these properly later, when needed. The
        # _unit must be set before the parent __init__ is called below (which
        # looks like a bug in NDDataArray), as the latter calls the uncertainty
        # setter, which uses the unit getter.
        self._mask = None
        self._wcs = wcs
        self._unit = None

        # Initializing the data (& uncertainty/flags) to None indicates that
        # the data haven't been loaded yet:
        self._data = None

        # Attach the object to which lazy loading is delegated (and which
        # tracks the mapping of attributes to extensions). This must be done
        # before setting some of the other attributes, whose setters need to
        # access the data (which may or may not be what we want for lazy
        # loading but is what currently happens in nddata).
        self._io = iomap

        if data is None:
            # When starting from scratch, the only initialization we can
            # inherit from our ancestors is the most basic stuff that happens
            # in the NDDataBase class (which doesn't do much at present but
            # just in case it does later...):
            super(NDData, self).__init__()

            # Normally self._data, self._uncertainty are set by NDData and
            # self._flags by NDDataArray. They are only used by the relevant
            # attribute getters & setters upstream.
            self._uncertainty = uncertainty
            self._flags = flags

            # If this remains undefined, we'll load it from file below:
            self._meta = meta

            # Initialize attributes via the upstream setter logic of the public
            # API where possible (wcs doesn't have one yet). Some of these
            # setters expect the private attribute version to be defined first:
            self.mask = mask
            self.unit = unit

        else:
            # If instantiating from NDLater, copy its iomap (unless given one).
            # This can be a copy by reference, as it gets replaced when saving
            # and re-mapping.
            if self._io is None and hasattr(data, '_io'):
                self._io = data._io

            # Instantiating one NDLater from another probably triggers lazy
            # loading inadvertently in the parent __init__ below, because the
            # uncertainty setter causes a comparison with data.shape. Need to
            # do something about this.

            # When passed data as well as a filename, let our parent class
            # populate the class attributes and then we'll lazily load anything
            # that wasn't provided. It's the caller's responsibility to avoid
            # overriding inconsistent subsets of what's already in the file
            # (eg. data without the corresponding uncertainty) but the DataFile
            # class can help take care of that.
            self._parent.__init__(data, uncertainty=uncertainty, mask=mask,
                                  flags=flags, wcs=wcs, meta=meta, unit=unit)

            # NDDataArray doesn't copy flags from data when it should. We can't
            # duck type this because numpy also has a different "flags".
            if flags is None and isinstance(data, NDDataArray):
                self._flags = data.flags

        # Don't bother loading the header lazily, but still get it via NDMapIO,
        # to avoid adding I/O logic in more places than necessary. We also need
        # the meta-data here, to determine things like ident, units & WCS.
        if self._meta is None:
            if self._io:
                self._meta = self._io.load_meta()
            else:
                self._meta = OrderedDict()

        # Because NDDataArray's mask & flags setters refer to data.shape,
        # they will unintentionally trigger lazy loading if we're provided
        # explicitly with a flags argument. Consider overriding the shape
        # getter (etc.) to avoid this, using the metadata directly (with the
        # help of the io library?) or checking for _data==None and doing
        # del _data afterwards if necessary as a temporary workaround.

        # These setters only work after creating the _io attribute above.
        self.uncertainty = self._uncertainty
        self.flags = self._flags

        # Where the file is mapped from disk but data & meta aren't loaded/
        # accessed yet, this flag remains false and we can avoid having to save
        # again prior to running some external program (ie. IRAF/run_task) on a
        # DataFileList, without first checking hashes. This is a one-time
        # optimization, as we don't know later (even after saving) whether the
        # user/app could modify a prior ref to the data, but it supports the
        # common case where DataFileList is only used to list input filenames.
        self._unloaded = data is None and uncertainty is None and \
                         flags is None and meta is None

        # Set or override identifier in order of precedence: 1. user parameter,
        # 2. value persisted in meta-data, 3. format-native iomap value.
        # Changing the value read from disk resets the above unloaded flag:
        if self.ident is None and self._io:
            self._meta[self._id_key] = self._io.ident
        if ident is not None:
            self.ident = ident

    @property
    def data(self):
        if self._data is None and self._io:
            self._data = self._io.load_data()
            self._unloaded = False
        return self._data

    @data.setter
    def data(self, value):
        self._data = value

    @data.deleter
    def data(self):
        # This doesn't always free memory in practice, probably due to PyFITS's
        # default memory mapping, but either way it provides the indended means
        # of dropping our reference to the data.
        self._data = None

    @property
    def uncertainty(self):
        # This prevents resetting the value to None; use del instead
        if self._parent.uncertainty is None and self._io:
            self.uncertainty = self._io.load_uncertainty()
            self._unloaded = False
        return self._parent.uncertainty

    # Parent class setters seem not to get called automatically once a getter
    # is defined but unfortunately super() simply doesn't work as a proxy for
    # setters (Python issue 14965) so we have to do it the following way:

    @uncertainty.setter
    def uncertainty(self, value):
        NDDataArray.uncertainty.fset(self, value)

    @uncertainty.deleter
    def uncertainty(self):
        self.uncertainty = None

    @property
    def flags(self):
        # This prevents resetting the value to None; use del instead
        if self._parent.flags is None and self._io:
            self.flags = self._io.load_flags()
            self._unloaded = False
        return self._parent.flags

    @flags.setter
    def flags(self, value):
        NDDataArray.flags.fset(self, value)

    @flags.deleter
    def flags(self):
        self.flags = None

    @property
    def meta(self):
        self._unloaded = False
        return self._meta

    # This is needed because __init__ assigns to self.meta in astropy 1.2.1,
    # but should probably be updated in future to wrap (or use a sub-class of)
    # the MetaData descriptor now used by NDData, once 1.1 support is dropped?
    @meta.setter
    def meta(self, value):
        self._unloaded = False
        if value is None:
            value = OrderedDict()
        if not hasattr(value, 'keys'):
            raise TypeError('value to assign to meta must be dict-like')
        self._meta = value

    @property
    def ident(self):
        val = self._meta.get(self._id_key, None)
        return val or None  # PyFITS encodes None as ''; no need to distinguish

    @ident.setter
    def ident(self, value):
        if value != self.ident:
            self._unloaded = False
        self._meta[self._id_key] = value

    @property
    def unloaded(self):
        return self._unloaded

    # Combine subclass-modified argument defaults with those specified by user:
    def _arith_args(self, kwargs):
        arith_args = self._arith_defaults.copy()
        arith_args.update(kwargs)
        return arith_args

    def _bitwise_arith(self, operand, operation):
        """
        {name} another dataset (``operand``) to/from/with/by this dataset.

        Parameters
        ----------
        operand : `~astropy.nddata.NDData` or None
            The second operand in the operation a {operator} b.
            This should be None for unary operators.

        Returns
        -------
        result : `~astropy.nddata.NDData`
            The resulting dataset.

        Notes
        -----
        For bit-wise arithmetic (where the use case is generally combining bad
        pixel masks), any uncertainty & flags are currently dropped. Any WCS
        is ignored and propagated from ``self``.
        """

        # NDArithmeticMixin._arithmetic() cannot be used here because it
        # imposes units (for the main data array); the resulting Quantity
        # converts the array dtype to floating point and NumPy's bitwise
        # operators don't work on floating point data. Just assume for now
        # that there is no uncertainty propagation for bitwise ops.

        # Distinguish unary & binary operations:
        if operand is None:
            args = (self.data,)
        else:
            args = (self.data, operand)  # operand can be numpy-like

        # Do the calculation with NumPy:
        try:
            data = operation(*args)
        except TypeError:
            raise TypeError('bit-wise operators can only be used on '\
                            'integer-type arrays')

        # Construct an output NDLater instance:
        result = self.__class__(data, uncertainty=None,
                                mask=None, wcs=deepcopy(self.wcs),
                                flags=None, meta=deepcopy(self.meta),
                                unit=None)

        return result

[docs]    def bitwise_or(self, operand):
        return self._bitwise_arith(operand, np.bitwise_or)

    bitwise_or.__doc__ = \
        _bitwise_arith.__doc__.format(name="OR", operator="|")

    def __or__(self, operand):
        return self.bitwise_or(operand)

[docs]    def bitwise_and(self, operand):
        return self._bitwise_arith(operand, np.bitwise_and)

    bitwise_and.__doc__ = \
        _bitwise_arith.__doc__.format(name="AND", operator="&")

    def __and__(self, operand):
        return self.bitwise_and(operand)

[docs]    def bitwise_xor(self, operand):
        return self._bitwise_arith(operand, np.bitwise_xor)

    bitwise_xor.__doc__ = \
        _bitwise_arith.__doc__.format(name="XOR", operator="^")

    def __xor__(self, operand):
        return self.bitwise_xor(operand)

[docs]    def invert(self):
        """
        Calculate the bit-wise NOT of this dataset.

        Returns
        -------
        result : `~astropy.nddata.NDData`
            The resulting dataset.

        Notes
        -----
        For bit-wise arithmetic (where the use case is generally combining bad
        pixel masks), any uncertainty & flags are currently dropped. Any WCS
        is ignored and propagated from ``self``.
        """
        return self._bitwise_arith(None, np.invert)

    def __invert__(self):
        return self.invert()

    # For these operators that wrap NDArithmeticMixin methods, bad pixel mask
    # propagation isn't going to work properly until we revert to using `mask`
    # instead of `flags`. NB. The corresponding parent class methods could do
    # with a bit more checking of parameter values (they can produce some
    # pretty obscure errors).

[docs]    def add(self, operand, **kwargs):
        return self._parent.add(operand, **self._arith_args(kwargs))

    def __add__(self, operand):
        return self.add(operand)

[docs]    def subtract(self, operand, **kwargs):
        return self._parent.subtract(operand, **self._arith_args(kwargs))

    def __sub__(self, operand):
        return self.subtract(operand)

[docs]    def multiply(self, operand, **kwargs):
        return self._parent.multiply(operand, **self._arith_args(kwargs))

    def __mul__(self, operand):
        return self.multiply(operand)

[docs]    def divide(self, operand, **kwargs):
        return self._parent.divide(operand, **self._arith_args(kwargs))

    def __div__(self, operand):
        return self.divide(operand)

    def __truediv__(self, operand):
        return self.divide(operand)


[docs]def load_file_list(filename):
    """
    Load a text file containing a list of filenames (or other strings) as a
    Python list.

    To obtain a list of DataFile objects, the result can easily be converted
    as in the example:

    >>> raw_biases = DataFileList(load_file_list('list_of_biases.txt'))

    If the listed files need downloading first, the usage would be similar to:

    >>> bias_list = load_file_list('list_of_biases.txt')
    >>> download_files(bias_list, server='gemini', dirname='raw')
    >>> raw_biases = DataFileList(bias_list, dirname='raw')

    (or it may be preferable to produce the initial list by other means, such
    as command-line arguments or a list definition in the user script).

    The DataFileList object can subsequently be used in place of the initial
    plain Python list.


    Parameters
    ----------

    filename : str
        Name of a plain-text file, containing one entry per line. Although
        the intention is mainly to work with filenames, any non-comment strings
        are valid. Lines whose first non-whitespace character is '#' are
        treated as comments.

    Returns
    -------

    list of str
        A list of filenames (or other strings), one per input line with any
        leading or trailing whitespace removed.

    """

    f = open(filename, 'r')

    flist = []

    # Append one filename per line to the list:
    for line in f:
        line = line.strip()         # remove new lines & trailing/leading space
        if line and line[0] != '#': # ignore empty lines & comments
            flist.append(line)

    f.close()

    return flist


[docs]def temp_saved_datafile(datafile):
    """
    Save a copy of a DataFile instance using a temporary filename, eg. for
    use by an external program, and return the copy. It is the caller's
    responsibility to delete the file once it is no longer needed.

    Although mapped to different files, both objects share any pixel data in
    memory until reloaded.
    """
    name = new_filename(base=datafile.filename.base,
                        ext=datafile.filename.dotext, full_path=False)

    tdf = DataFile(data=datafile, filename=name, dirname='', mode='overwrite')

    # Re-create the file as soon as possible after new_filename has released
    # the file handle, by saving the new DataFile object:
    tdf.save()

    # Pass the new temporary DataFile back to the caller (should it
    # automatically delete its own file when it goes out of scope?):
    return tdf


# To do:
# - Is the DataFile init logic needlessly re-reading any DataFile passed
#   as an argument? More specifically, I think this will be triggered when
#   adding a DataFile to a DataFileList with mode='read'.
# - Implement deepcopy methods?
Navigation

Source code for ndmapper.data