Source code for ndmapper.lib.cosmetics

# Copyright(c) 2015-2016 Association of Universities for Research in Astronomy, Inc.
# by James E.H. Turner.

import os.path
from copy import copy, deepcopy

import numpy as np
from astropy.modeling import models, fitting

from ndmapper import config, ndprocess_defaults
from ndmapper.data import FileName, DataFile, DataFileList, NDLater
from ndmapper.utils import convert_region, to_datafilelist
from .fitting import fit_1D

__all__ = ['init_bpm', 'add_bpm', 'lacosmic_spec', 'clean_cosmic_rays']


@ndprocess_defaults
[docs]def init_bpm(reference, regions, convention='numpy', value=None, filename=None, reprocess=None): """ Initialize a bad pixel mask array from a string listing the corresponding regions in NumPy or IRAF/FITS syntax. Parameters ---------- reference : DataFile DataFile whose NDLater instances the bad pixel mask should match (in number and array dimensions). regions : list of list of str A list containing one sub-list of bad regions per NDLater instance of the reference DataFile. Each bad region is specified as a string in NumPy/IRAF syntax, eg. '100:110,50:60', '100,*' or ':,99'. The indices can be specified in either NumPy or FITS ordering (see below). convention : str Indexing convention for the region strings: 'NumPy' (default) or 'FITS' (case insensitive). The former uses 0-based indexing by (..., plane, row, column), similar to C or matrix notation, exclusive of the ending row/column, while the latter uses 1-based indexing by (column, row, plane, ...), similar to ForTran or IRAF and inclusive of the ending row/column (with the same origin in either case). value : int, optional Mask value for bad regions (default 1). filename : str or FileName, optional Filename for output DataFile. """ # Use default bad pixel value of 1: if value is None: value = 1 # Re-map the flags/data-quality array labels to the main science array for # the special purpose of keeping a master bad pixel mask (NDData doesn't # allow for .flags without associated .data). This method could do with # tidying up a bit, as simply giving labels=config['labels']['flags'] to # DataFile without also resetting labels['flags'] causes duplication, due # to the condition for including an array in the lists in DataFile.save(): labels = config['labels'].copy() labels['data'] = labels['flags'] labels['flags'] = None # Create (or read from disk, if not reprocessing) the output DataFile. # This is provisional, until a more definitive method is implemented, # possibly as part of the decorator. mode = 'new' if reprocess is None else \ 'update' if reprocess is False and os.path.exists(str(filename)) \ else 'overwrite' output = DataFile(filename, mode=mode, labels=labels) if mode == 'update': return output # Create an empty integer array of the same shape as each reference # NDData instance: for ndd in reference: # Do we want meta=ndd.meta.copy()? Probably not. output.append(NDLater(np.zeros(ndd.data.shape, dtype=np.uint16))) # Convert each region string from NumPy or FITS region string format to # a NumPy slice object and use it to set the specified flag value for the # corresponding pixels: for ndd, reglist in zip(output, regions): for region in reglist: slices = convert_region(region, convention) ndd.data[slices] = value return output
@ndprocess_defaults
[docs]def add_bpm(inputs, bpm, out_names=None, reprocess=None): """ Incorporate an existing bad pixel mask into the corresponding flags attributes of the input DataFile instances. Parameters ---------- inputs : `DataFileList` or `DataFile` Input images to which the bad pixel mask is to be added (via logical OR with any existing ``flags`` information). bpm : `DataFileList` or `DataFile` Bad pixel mask, as produced by `init_bpm`, with integer data quality flags stored in the main data array of each constituent NDLater instance. The length (number of NDLater instances) and shape of each constituent array must match the ``inputs``. out_names : convertible to `str`, optional Output filenames. If None (default), the names of the DataFile instances returned will be constructed from those of the input files, prefixed with 'k'. Returns ------- DataFileList Copies of the ``inputs``, with the bad pixel mask values included in the flags attribute of each constituent `NDLater` instance. Package 'config' options ------------------------ reprocess : bool or None Re-generate and overwrite any existing output files on disk or skip processing and re-use existing results, where available? The default of None instead raises an exception where outputs already exist (requiring the user to delete them explicitly). The processing is always performed for outputs that aren't already available. """ # Some of this bookeeping should be replaced by a standard wrapper. # Also, logging needs to be implemented, as for IRAF. inputs = to_datafilelist(inputs) if out_names is None: out_names = [FileName(df, prefix='k', dirname='') for df in inputs] mode = 'new' if reprocess is None else 'overwrite' outlist = DataFileList(mode=mode) # Here the wrapper would ensure that input/output lengths are the same. len_bpm = len(bpm) # Loop over the files: for in_df, out_name in zip(inputs, out_names): if len(in_df) != len_bpm: raise ValueError('input {0} and BPM have different lengths'\ .format(str(in_df))) # Read and use any existing file if not reprocessing: if reprocess is False and os.path.exists(str(out_name)): out_df = DataFile(out_name, mode='update') # Otherwise, OR each NDLater instance of the BPM with the corresponding # flags extension of the input file: else: # Make a copy the inputs, re-using the data by reference since # replacing the flags attribute won't affect the inputs. out_df = DataFile(out_name, data=in_df, mode=mode) # Set flags to the logical OR of the BPM & any existing flags: for out_ndd, bpm_ndd in zip(out_df, bpm): out_ndd.flags = copy(bpm_ndd.data) if out_ndd.flags is None \ else bpm_ndd.data | out_ndd.flags outlist.append(out_df) return outlist
[docs]def lacosmic_spec(input_ndd, x_order=None, y_order=None, sigclip=4.5, sigfrac=0.32, objlim=1.0, niter=5, sepmed=True, cleantype='meanmask'): """ Detect and clean cosmic rays in a 2D wavelength-dispersed image, using the well-known LA Cosmic algorithm of van Dokkum (2001)*, as implemented in McCully's optimized version for Python, "lacosmicx"+. * LA Cosmic: http://www.astro.yale.edu/dokkum/lacosmic + lacosmicx: https://github.com/cmccully/lacosmicx lacosmicx is an optional dependency, whose absence will cause this function to fail with an ImportError. For the time being, the slightly modified fork at https://github.com/jehturner/lacosmicx must be used. Currently, input and output bad pixel masks are expected to be found in the NDDataArray `flags` attribute, but this will likely change to `mask` in future (throughout NDMapper). Parameters ---------- input_ndd : NDDataArray-like Input image in which cosmic rays are to be detected. x_order, y_order : int or None, optional Order for fitting and subtracting object continuum and sky line models, prior to running the main cosmic ray detection algorithm. When None, defaults are used, according to the image size (as in the IRAF task gemcrspec). When 0, no fit is done. sigclip : float, optional Laplacian-to-noise limit for cosmic ray detection. Lower values will flag more pixels as cosmic rays. Default: 4.5. sigfrac : float, optional Fractional detection limit for neighboring pixels. For cosmic ray neighbor pixels, a lapacian-to-noise detection limit of sigfrac * sigclip will be used. Default: 0.32. objlim : float, optional Minimum contrast between Laplacian image and the fine structure image. Increase this value if cores of bright stars are flagged as cosmic rays. Default: 1.0. niter : int, optional Number of iterations of the LA Cosmic algorithm to perform. Default: 5. sepmed : boolean, optional Use the separable median filter instead of the full median filter. The separable median is not identical to the full median filter, but they are approximately the same and the separable median filter is significantly faster and still detects cosmic rays well. Default: True cleantype : {'median', 'medmask', 'meanmask', 'idw'}, optional Set which clean algorithm is used: 'median': An umasked 5x5 median filter 'medmask': A masked 5x5 median filter 'meanmask': A masked 5x5 mean filter 'idw': A masked 5x5 inverse distance weighted interpolation Default: "meanmask". Returns ------- NDLater A cleaned copy of the input, with cosmic ray detections added to its `flags` array (with a value of 8). """ # This can fail if the optional dependency is missing: from lacosmicx import lacosmicx # For this function to be general-purpose, it should use a meta-data # abstraction for the gain, read noise & saturation, based on a # configuration database of recognized instruments, but until that's # implemented they are required to be called GAIN and RNOISE in meta-data # and the assumed saturation level is fixed at 65k. gain = input_ndd.meta['GAIN'] read_noise = input_ndd.meta['RDNOISE'] saturation = 65535. # Convert DQ to a boolean array of pixels for lacosmicx to treat as bad # from the beginning: bitmask = 65535 # do we want to include everything? if input_ndd.flags is None: inmask = None else: inmask = (input_ndd.flags & bitmask) > 0 # Use default orders from gemcrspec (from Bryan): ny, nx = input_ndd.shape if x_order is None: x_order = 9 if y_order is None: y_order = 2 if ny < 50 else 3 if ny < 80 else 5 # To do: use dispaxis below, rather than -1. # Fit and subtract the object spectrum. For some reason, subtracting the # model directly from the NDData instance here resets .flags to None. if x_order > 0: objfit = fit_1D(input_ndd.data, function='legendre', order=x_order, axis=1, lsigma=4.0, hsigma=4.0, iterations=3) input_ndd.data -= objfit else: objfit = np.zeros_like(input_ndd.data) # Fit sky lines: if y_order > 0: skyfit = fit_1D(input_ndd.data, function='legendre', order=y_order, axis=0, lsigma=4.0, hsigma=4.0, iterations=3) input_ndd.data -= skyfit objfit += skyfit # keep combined fits for later restoration del skyfit # Delegate all the actual identification and cleaning to lacosmicx (a # version to which I've added a bgsub parameter that allows for a # previously-subtracted spectroscopic object+sky model to be included in # the noise estimates, as in the original): cr_mask, clean_data = lacosmicx( input_ndd.data, inmask=inmask, bgsub=objfit, sigclip=sigclip, sigfrac=sigfrac, objlim=objlim, gain=gain, readnoise=read_noise, satlevel=saturation, pssl=0.0, niter=niter, sepmed=sepmed, cleantype=cleantype, fsmode='median', verbose=True ) # Add object & sky signal back in, after cleaning what structure is left: clean_data += objfit # Obey config options for whether to propagate uncertainty & flags: if input_ndd.uncertainty is not None and config['use_uncert']: uncertainty = deepcopy(input_ndd.uncertainty) else: uncertainty = None if input_ndd.flags is not None and config['use_flags']: # To do: abstract the DQ convention somewhere instead of using "8": flags = input_ndd.flags | (np.array(cr_mask, dtype=np.uint16) * 8) else: flags = None # Construct an NDData-like object from the lacosmicx output, plus the # original variance and bad pixel mask, and return it, return NDLater(data=clean_data, uncertainty=uncertainty, flags=flags, meta=input_ndd.meta)
@ndprocess_defaults
[docs]def clean_cosmic_rays(inputs, out_names=None, x_order=None, y_order=None, sigma=4.5, sigfrac=0.32, objlim=1.0, iterations=5, reprocess=None): """ Identify pixels contaminated by cosmic ray flux, using McCully's version of the LACosmic algorithm (see `lacosmic_spec`), record the detections in a copy of the input `flags` array and replace bad values with an estimate of the clean value, based on a masked mean of surrounding pixels. This works on 2D images (with fitting disabled) and spectra. Parameters ---------- inputs : DataFileList or DataFile 2D bias-subtracted input images to be cleaned. out_names : `str`-like or list of `str`-like, optional Names of cleaned output images. If None (default), the names of the DataFile instances returned will be constructed from those of the corresponding input files, prefixed with 'x', as in Gemini IRAF. x_order, y_order : int or None, optional Order for fitting and subtracting object continuum and sky line models, prior to running the main cosmic ray detection algorithm. When None, defaults are used, according to the image size (as in the IRAF task gemcrspec). When 0, no fit is done. sigma : float, optional Laplacian-to-noise limit for cosmic ray detection. Lower values will flag more pixels as cosmic rays. Default: 4.5. sigfrac : float, optional Fractional detection limit for neighboring pixels. For cosmic ray neighbour pixels, a lapacian-to-noise detection limit of sigfrac * sigclip will be used. Default: 0.32. objlim : float, optional Minimum contrast between Laplacian image and the fine structure image. Increase this value if cores of bright stars are flagged as cosmic rays. Default: 1.0. iterations : int, optional Number of iterations of the detection algorithm to perform. Default: 5. Returns ------- DataFileList A copy of the input data with cosmic ray detections included in its `flags` arrays and the corresponding `data` values replaced by local masked mean values. Package 'config' options ------------------------ reprocess : bool or None Re-generate and overwrite any existing output files on disk or skip processing and re-use existing results, where available? The default of None instead raises an exception where outputs already exist (requiring the user to delete them explicitly). The processing is always performed for outputs that aren't already available. """ # Default prefix if output filenames unspecified: prefix = 'x' inputs = to_datafilelist(inputs) if not out_names: out_names = [FileName(indf, prefix=prefix) for indf in inputs] elif len(out_names) != len(inputs): raise ValueError('inputs & out_names have unmatched lengths') mode = 'new' if reprocess is None else 'overwrite' outlist = DataFileList(mode=mode) # For now, GAIN & RDNOISE are hard-wired into lacosmic_spec, but should # be abstracted at some point. We don't use gemvars in Python and the # config options to use uncertainty & flags are picked up directly by # lacosmic_spec(). # Loop over the files: for in_df, out_name in zip(inputs, out_names): # Read and use any existing file if not reprocessing: # To do: move this sort of logic into shared infrastructure. if reprocess is False and os.path.exists(str(out_name)): out_df = DataFile(out_name, mode='update') # Otherwise do/repeat the cosmic ray detection: else: # Make a new copy of the input file (copied by reference, as the # actual NDData instances get overwritten below and higher-level # attributes get deep-copied anyway -- though not any MDF tables): out_df = DataFile(out_name, data=in_df, mode=mode) # Run lacosmic_spec() on each NDData instance of the current file: for n, in_ndd in enumerate(in_df): out_df[n] = lacosmic_spec(in_ndd, x_order=x_order, y_order=y_order, sigclip=sigma, sigfrac=sigfrac, objlim=objlim, niter=iterations, sepmed=True, cleantype='meanmask') outlist.append(out_df) return outlist