Source code for quasarnp.utils

"""A module containing facilities for rebinning data and processing QuasarNP output.

There are three methods in this module, one that will process the output of
QuasarNP into a human parsable form, and two that are used to rebin the DESI
wavelength grid into the QuasarNet wavelength grid. In addition, there is
a global dictionary that maps emission line names to their rest wavelength.
"""

import numpy as np

# Absorption wavelengths
absorber_IGM = {
    'Halpha'      : 6562.8,
    'OIII(5008)'  : 5008.24,
    'OIII(4933)'  : 4932.68,
    'Hbeta'       : 4862.68,
    'MgI(2853)'   : 2852.96,
    'MgII(2804)'  : 2803.5324,
    'MgII(2796)'  : 2796.3511,
    'FeII(2600)'  : 2600.1724835,
    'FeII(2587)'  : 2586.6495659,
    'MnII(2577)'  : 2576.877,
    'FeII(2383)'  : 2382.7641781,
    'FeII(2374)'  : 2374.4603294,
    'FeII(2344)'  : 2344.2129601,
    'CIII(1909)'  : 1908.734,
    'AlIII(1863)' : 1862.79113,
    'AlIII(1855)' : 1854.71829,
    'AlII(1671)'  : 1670.7886,
    'FeII(1608)'  : 1608.4511,
    'CIV(1551)'   : 1550.77845,
    'CIV(eff)'    : 1549.06,
    'CIV(1548)'   : 1548.2049,
    'SiII(1527)'  : 1526.70698,
    'SiIV(1403)'  : 1402.77291,
    'SiIV(1394)'  : 1393.76018,
    'CII(1335)'   : 1334.5323,
    'SiII(1304)'  : 1304.3702,
    'OI(1302)'    : 1302.1685,
    'SiII(1260)'  : 1260.4221,
    'NV(1243)'    : 1242.804,
    'NV(1239)'    : 1238.821,
    'LYA'         : 1215.67,
    'SiIII(1207)' : 1206.500,
    'NI(1200)'    : 1200.,
    'SiII(1193)'  : 1193.2897,
    'SiII(1190)'  : 1190.4158,
    'OI(1039)'    : 1039.230,
    'OVI(1038)'   : 1037.613,
    'OVI(1032)'   : 1031.912,
    'LYB'         : 1025.72,
}


[docs]
class WaveGrid():
    def __init__(self, linear=False, wmin=None, wmax=None, wdelta=None, grid=None):
        """
            Initialize a WaveGrid object, used for defining spectra wavelength grids.

            Parameters
            ----------
            linear : bool, optional
                Whether this wavelength grid is equally spaced in linear wavelength
                or equally spaced in logarithmic wavelength.

            wmin : float, optional
                The lower bound of the wavelength grid. Defaults to None, which uses
                3600 for both the default linear and logarithmic wavelength grids.

            wmax : float, optional
                The upper bound of the wavelength grid. Defaults to None, which uses
                10000 for the default logarithmic grid and 9824 for the default
                linear grid.

            wdelta : float, optional
                The spacing between wavelength bins in the wavelength grid.
                If linear, this spacing is the spacing in linear wavelength.
                If not linear, then this spacing must be in logarithmic wavelength.
                Defaults to None, which uses 1e-3 for the default logarithmic
                wavelength grid, and 0.8 * 17 (13.6) for the default linear
                QuasarNET wavelength grid.

            grid : numpy.ndarray, optional
                Override all other parameters, and use this wavelength grid
                as the wavelength grid. Allows for unusual or inconsistent spacing
                grids, but may break some other functionality. Defaults to None.

        """
        self.is_linear = linear

        # Both grids have the same lower bound.
        if wmin is None:
            wmin = 3600

        if self.is_linear:
            if wmax is None:
                wmax = 9824
            if wdelta is None:
                # 17 seems arbitrary but its the constant needed to get approximately
                # the same number of linear bins as in the logarithmic case
                # (458 vs 443)
                wdelta = 0.8 * 17
        else:
            if wmax is None:
                wmax = 10000
            if wdelta is None:
                wdelta = 1e-3
            wmin = np.log10(wmin)
            wmax = np.log10(wmax)
            nbins = int((wmax - wmin) / wdelta)

        # Sanity checking.
        if wmax < wmin:
            raise ValueError(f"wmin ({wmin}) must be less than wmax ({wmax})!")

        if grid is not None:
            self.wave = grid
            self.wmin = grid[0]
            self.wmax = grid[-1]
            # Assume uniform, but even if not the first grid spacing is a reasonable choice.
            if self.is_linear:
                self.wdelta = grid[1] - grid[0]
            else:
                self.wdelta = np.log10(grid[1]) - np.log10(grid[0])
        else:
            if self.is_linear:
                self.wave = np.round(np.arange(wmin, wmax + 1e-3, wdelta), 1)
            else:
                self.wave = 10**(wmin + np.arange(nbins) * wdelta)
            self.wmin, self.wmax, self.wdelta = wmin, wmax, wdelta

    def __len__(self):
        return len(self.wave)

    def __getitem__(self, key):
        return self.wave[key]




[docs]
def process_preds(preds, lines, lines_bal, verbose=True, wave=WaveGrid(linear=False)):
    """Convert network output to line confidence and redshift predictions.

    Parameters
    ----------
        preds : numpy.ndarray
            Model prediction, output of `model.predict`.
        lines : list of str
            List of line names.
        lines_bal : list of str
            List of BAL line names.
        verbose : bool, optional
            Whether or not to print verbose debug output. Defaults to True.
        wavegrid : WaveGrid, optional
            The wavelength grid used to generate these predictions. Used to
            determine redshift. Defaults to the default logarithmic QuasarNET grid.

    Returns
    -------
        c_line : numpy.ndarray
            Confidence that each line appears in the given spectra, with
            shape `(nlines, nspec)`.
        z_line : numpy.ndarray
            Estimated redshift of the spectra derived from each line, with
            shape `(nlines, nspec)`.
        zbest : numpy.ndarray
            Redshift of the most confident line for each spectra with length
            `nspec`.
        c_line_bal : numpy.ndarray
            Confidence that each BAL line appears in the given spectra, with
            shape `(nlines_bal, nspec)`.
        z_line_bal : numpy.ndarray
            Estimated redshift of the spectra derived from each BAL line, with
            shape `(nlines_bal, nspec)`.

    Notes
    -----
    This method determines the number of trained lines and BAL lines by setting
    `nlines = len(lines)` and `nlines_bal = len(lines_bal)`.
    """
    assert len(lines) + len(lines_bal) == len(preds), "Total number of lines does not match number of predictions!"

    nspec, nboxes = preds[0].shape
    nboxes //= 2
    if verbose:
        print(f"INFO: nspec = {nspec}, nboxes={nboxes}")
    nlines = len(lines)

    # Doing non BAL lines first
    c_line = np.zeros((nlines, nspec))
    z_line = np.zeros_like(c_line) # This ensures they're always the same shape.
    nbins = len(wave)
    i_to_wave = lambda x: np.interp(x, np.arange(nbins), wave.wave)

    for il, line in enumerate(lines):
        l = absorber_IGM[line]

        # j is the box number, offset is how far into the box the line is predicted to be
        j = preds[il][:, :13].argmax(axis=1)
        offset  = preds[il][np.arange(nspec, dtype=int), nboxes + j]

        # Confidence in line, redshift of line
        c_line[il] = preds[il][:, :13].max(axis=1)

        # Take the "index" (sclara index + float offset), map that to
        # a box and how far into that box the line is, convert that
        # box distance to a wavelength. Convert the wavelength to a
        # redshift.
        z_line[il] = (i_to_wave((j + offset) * nbins / nboxes) / l) - 1

    # Not "best redshift", rather "redshift of most confident line"
    zbest = z_line[c_line.argmax(axis=0), np.arange(nspec)]
    zbest = np.array(zbest)

    # Code for BAL boxes is the same as above just run on the BAL lines.
    nlines_bal = len(lines_bal)
    c_line_bal = np.zeros((nlines_bal, nspec))
    z_line_bal = np.zeros_like(c_line_bal)

    for il, line in enumerate(lines_bal):
        l = absorber_IGM[line]

        j = preds[nlines+il][:, :13].argmax(axis=1)
        offset  = preds[il+nlines][np.arange(nspec, dtype=int), nboxes + j]

        c_line_bal[il] = preds[il + nlines][:, :13].max(axis=1)
        z_line_bal[il] = (i_to_wave((j + offset) * nbins / nboxes) / l) - 1

    return c_line, z_line, zbest, c_line_bal, z_line_bal



[docs]
def regrid(old_grid, new_grid=WaveGrid(linear=False)):
    """Generate the mapping from the old wavelength grid to the QuasarNet grid.

    Parameters
    ----------
    old_grid : WaveGrid
        The old wavelength grid.
    new_grid : WaveGrid, optional
        The wavelength grid to rebin the loaded exposure to. Defaults to the
        logarithmic QuasarNET grid.

    Returns
    -------
    bins : numpy.ndarray
        Array of length `len(old_grid)` where each element is the bin number
        in the new grid that the old wavelength bin is assigned to.
    w : numpy.ndarray
        Array of length `len(old_grid)` where each element is True if the old
        wavelength bin is contained within the new grid boundaries and False if
        it is not.
    """
    linear_spacing = np.allclose(np.diff(new_grid.wave)[0], np.diff(new_grid.wave))
    log_spacing = np.allclose(np.diff(np.log10(new_grid.wave))[0], np.diff(np.log10(new_grid.wave)))

    if linear_spacing:
        # Rounding off at the 10th decimal place helps avoid float rounding errors when
        # rebinning the desi grid to qnet grids since the latter should be an integer
        # number of the former bins.
        wdelta = np.diff(new_grid.wave)[0]
        wmin = new_grid.wave[0]
        bins = np.floor(np.round(((old_grid.wave - wmin) / (wdelta)), decimals=10)).astype(int)

    elif log_spacing:
        l_min = np.log10(new_grid.wave)[0]
        dl = np.diff(np.log10(new_grid.wave))[0]
        bins = np.floor((np.log10(old_grid.wave) - l_min) / dl).astype(int)

    else:
        raise ValueError("New grid spacing must be constant in either logarithmic or linear wavelength.")

    w = (bins >= 0) & (bins < len(new_grid))
    return bins, w




[docs]
def rebin(flux, ivar, w_grid, out_grid=WaveGrid(linear=False)):
    """Rebin flux to the QuasarNet wavelength grid.

    The process for rebinning flux is as follows. First, the flux is multiplied
    by the ivar. Then, each bin from the old wavelength grid is assigned to
    a new bin on the new grid. The `flux*ivar` assigned to each new bin
    is summed and stored. The `ivar` assigned to each bin is also summed and
    stored. These rebinned values are then returned.

    Parameters
    ----------
    flux : numpy.ndarray
        Input flux array of shape `(nspec, len(w_grid))`.
    ivar : numpy.ndarray
        Input ivar array of shape `(nspec, len(w_grid))`.
    w_grid: WaveGrid
        Input wavelength grid.
    out_grid : WaveGrid, optional
        The wavelength grid to rebin the loaded exposure to. Defaults to the
        logarithmic QuasarNET grid.

    Returns
    -------
    flux_out : numpy.ndarray
        Input flux rebinned onto the QuasarNet wavelength grid.
    ivar_out : numpy.ndarray
        Input ivar rebinned onto the QuasarNet wavelength grid.

    See Also
    --------
    regrid : Function that converts the old wavelength grid to the new grid.
    """

    bins, w = regrid(w_grid, out_grid)

    fl_iv = flux * ivar

    # len(flux) will give number of spectra,
    # len(new_grid) will give number of output bins
    flux_out = np.zeros((len(flux), len(out_grid)))
    ivar_out = np.zeros_like(flux_out)

    # These lines are necessary for SDSS spectra. For DESI
    # spectra nothing will change here, since the entire DESI grid is contained
    # within the QuasarNET one, but for BOSS/eBOSS the grid can extend out
    # past the QuasarNET grid and give negative bin values. I have tests that
    # confirm this still works on DESI data, don't worry.
    fl_iv = fl_iv[:, w]
    bins = bins[w]
    ivar_temp = ivar[:, w]

    for i in range(len(flux)):
        c = np.bincount(bins, weights=fl_iv[i, :])
        flux_out[i, :len(c)] += c
        c = np.bincount(bins, weights=ivar_temp[i, :])
        ivar_out[i, :len(c)] += c

    return flux_out, ivar_out




[docs]
def renormalize(flux, ivar):
    """Renormalize the flux for processing with QuasarNet.

    The process for renormalizing flux is as follows. First, the weighted mean
    of the flux is calculated, with the ivar as weights. Then the weighted root
    mean squared value of the flux minus the mean is computed, once again using
    the ivar as weights. The renormalized flux is the initial flux minus the
    weighted mean and then divided by the rms value.

    Parameters
    ----------
    flux : numpy.ndarray
        Input flux array of shape `(nspec, len(w_grid))`.
    ivar : numpy.ndarray
        Input ivar array of shape `(nspec, len(w_grid))`.

    Returns
    -------
    flux_out : numpy.ndarray
        Input flux renormalized for use with QuasarNet.
    """

    # axis=1 corresponds to the rebinned spectral axis
    # Finding the weighted mean both for normalization and for the rms
    mean = np.average(flux, axis=1, weights=ivar)[:, None]
    rms = np.sqrt(np.average((flux - mean) ** 2, axis=1, weights=ivar))[:, None]

    # Normalize by subtracting the weighted mean and dividing by the rms
    # as prescribed in the original QuasarNet paper.
    return (flux - mean) / rms
Source code for quasarnp.utils

QuasarNP

Navigation

Related Topics