Source code for quasarnp.utils

"""A module containing facilities for rebinning data and processing QuasarNP output.

There are three methods in this module, one that will process the output of
QuasarNP into a human parsable form, and two that are used to rebin the DESI
wavelength grid into the QuasarNet wavelength grid. In addition, there is
a global dictionary that maps emission line names to their rest wavelength.
"""

import numpy as np

# Absorption wavelengths
absorber_IGM = {
    'Halpha'      : 6562.8,
    'OIII(5008)'  : 5008.24,
    'OIII(4933)'  : 4932.68,
    'Hbeta'       : 4862.68,
    'MgI(2853)'   : 2852.96,
    'MgII(2804)'  : 2803.5324,
    'MgII(2796)'  : 2796.3511,
    'FeII(2600)'  : 2600.1724835,
    'FeII(2587)'  : 2586.6495659,
    'MnII(2577)'  : 2576.877,
    'FeII(2383)'  : 2382.7641781,
    'FeII(2374)'  : 2374.4603294,
    'FeII(2344)'  : 2344.2129601,
    'CIII(1909)'  : 1908.734,
    'AlIII(1863)' : 1862.79113,
    'AlIII(1855)' : 1854.71829,
    'AlII(1671)'  : 1670.7886,
    'FeII(1608)'  : 1608.4511,
    'CIV(1551)'   : 1550.77845,
    'CIV(eff)'    : 1549.06,
    'CIV(1548)'   : 1548.2049,
    'SiII(1527)'  : 1526.70698,
    'SiIV(1403)'  : 1402.77291,
    'SiIV(1394)'  : 1393.76018,
    'CII(1335)'   : 1334.5323,
    'SiII(1304)'  : 1304.3702,
    'OI(1302)'    : 1302.1685,
    'SiII(1260)'  : 1260.4221,
    'NV(1243)'    : 1242.804,
    'NV(1239)'    : 1238.821,
    'LYA'         : 1215.67,
    'SiIII(1207)' : 1206.500,
    'NI(1200)'    : 1200.,
    'SiII(1193)'  : 1193.2897,
    'SiII(1190)'  : 1190.4158,
    'OI(1039)'    : 1039.230,
    'OVI(1038)'   : 1037.613,
    'OVI(1032)'   : 1031.912,
    'LYB'         : 1025.72,
}


# Log SDSS grid information
# TODO: Make these editable and expose them publicly.
# Perhaps in the same way Farr did?
l_min = np.log10(3600.)
l_max = np.log10(10000.)
dl = 1e-3 #* 2
nbins = int((l_max - l_min) / dl)
wave = 10**(l_min + np.arange(nbins) * dl)

# Linear DESI grid information
wmin, wmax, wdelta = 3600, 9824, 0.8
desi_wave = np.round(np.arange(wmin, wmax + wdelta, wdelta), 1)

# 17 seems arbitrary but its the constant needed to get approximately
# the same number of linear bins as in the logarithmic case
# (458 vs 443)
wdelta_qnet = wdelta * 17
linear_wave = np.round(np.arange(wmin, wmax + wdelta, wdelta_qnet), 1)
nbins_linear = len(linear_wave)


[docs] def process_preds(preds, lines, lines_bal, verbose=True, wave=wave): """Convert network output to line confidence and redshift predictions. Parameters ---------- preds : numpy.ndarray Model prediction, output of `model.predict`. lines : list of str List of line names. lines_bal : list of str List of BAL line names. verbose : bool, optional Whether or not to print verbose debug output. Defaults to True. Returns ------- c_line : numpy.ndarray Confidence that each line appears in the given spectra, with shape `(nlines, nspec)`. z_line : numpy.ndarray Estimated redshift of the spectra derived from each line, with shape `(nlines, nspec)`. zbest : numpy.ndarray Redshift of the most confident line for each spectra with length `nspec`. c_line_bal : numpy.ndarray Confidence that each BAL line appears in the given spectra, with shape `(nlines_bal, nspec)`. z_line_bal : numpy.ndarray Estimated redshift of the spectra derived from each BAL line, with shape `(nlines_bal, nspec)`. Notes ----- This method determines the number of trained lines and BAL lines by setting `nlines = len(lines)` and `nlines_bal = len(lines_bal)`. """ assert len(lines) + len(lines_bal) == len(preds), "Total number of lines does not match number of predictions!" nspec, nboxes = preds[0].shape nboxes //= 2 if verbose: print(f"INFO: nspec = {nspec}, nboxes={nboxes}") nlines = len(lines) # Doing non BAL lines first c_line = np.zeros((nlines, nspec)) z_line = np.zeros_like(c_line) # This ensures they're always the same shape. i_to_wave = lambda x: np.interp(x, np.arange(len(wave)), wave) for il, line in enumerate(lines): l = absorber_IGM[line] # j is the box number, offset is how far into the box the line is predicted to be j = preds[il][:, :13].argmax(axis=1) offset = preds[il][np.arange(nspec, dtype=int), nboxes + j] # Confidence in line, redshift of line c_line[il] = preds[il][:, :13].max(axis=1) # Take the "index" (sclara index + float offset), map that to # a box and how far into that box the line is, convert that # box distance to a wavelength. Convert the wavelength to a # redshift. z_line[il] = (i_to_wave((j + offset) * len(wave) / nboxes) / l) - 1 # Not "best redshift", rather "redshift of most confident line" zbest = z_line[c_line.argmax(axis=0), np.arange(nspec)] zbest = np.array(zbest) # Code for BAL boxes is the same as above just run on the BAL lines. nlines_bal = len(lines_bal) c_line_bal = np.zeros((nlines_bal, nspec)) z_line_bal = np.zeros_like(c_line_bal) for il, line in enumerate(lines_bal): l = absorber_IGM[line] j = preds[nlines+il][:, :13].argmax(axis=1) offset = preds[il+nlines][np.arange(nspec, dtype=int), nboxes + j] c_line_bal[il] = preds[il + nlines][:, :13].max(axis=1) z_line_bal[il] = (i_to_wave((j + offset) * len(wave) / nboxes) / l) - 1 return c_line, z_line, zbest, c_line_bal, z_line_bal
[docs] def regrid(old_grid, new_grid=wave): """Generate the mapping from the old wavelength grid to the QuasarNet grid. Parameters ---------- old_grid : numpy.ndarray The old wavelength grid. new_grid : numpy.ndarray, optional The wavelength grid to rebin the loaded exposure to. Defaults to the logarithmic QuasarNET grid. Returns ------- bins : numpy.ndarray Array of length `len(old_grid)` where each element is the bin number in the new grid that the old wavelength bin is assigned to. w : numpy.ndarray Array of length `len(old_grid)` where each element is True if the old wavelength bin is contained within the new grid boundaries and False if it is not. """ linear_spacing = np.allclose(np.diff(new_grid)[0], np.diff(new_grid)) log_spacing = np.allclose(np.diff(np.log10(new_grid))[0], np.diff(np.log10(new_grid))) if linear_spacing: # Rounding off at the 10th decimal place helps avoid float rounding errors when # rebinning the desi grid to qnet grids since the latter should be an integer # number of the former bins. wdelta = np.diff(new_grid)[0] wmin = new_grid[0] bins = np.floor(np.round(((old_grid - wmin) / (wdelta)), decimals=10)).astype(int) elif log_spacing: l_min = np.log10(new_grid)[0] dl = np.diff(np.log10(new_grid))[0] bins = np.floor((np.log10(old_grid) - l_min) / dl).astype(int) else: raise ValueError("New grid spacing must be constant in either logarithmic or linear wavelength.") w = (bins >= 0) & (bins < len(new_grid)) return bins, w
[docs] def rebin(flux, ivar, w_grid, out_grid=wave): """Rebin flux to the QuasarNet wavelength grid. The process for rebinning flux is as follows. First, the flux is multiplied by the ivar. Then, each bin from the old wavelength grid is assigned to a new bin on the new grid. The `flux*ivar` assigned to each new bin is summed and stored. The `ivar` assigned to each bin is also summed and stored. These rebinned values are then returned. Parameters ---------- flux : numpy.ndarray Input flux array of shape `(nspec, len(w_grid))`. ivar : numpy.ndarray Input ivar array of shape `(nspec, len(w_grid))`. w_grid: numpy.ndarray Input wavelength grid. out_grid : numpy.ndarray, optional The wavelength grid to rebin the loaded exposure to. Defaults to the logarithmic QuasarNET grid. Returns ------- flux_out : numpy.ndarray Input flux rebinned onto the QuasarNet wavelength grid. ivar_out : numpy.ndarray Input ivar rebinned onto the QuasarNet wavelength grid. See Also -------- regrid : Function that converts the old wavelength grid to the new grid. """ new_grid, w = regrid(w_grid, out_grid) fl_iv = flux * ivar # len(flux) will give number of spectra, # len(new_grid) will give number of output bins flux_out = np.zeros((len(flux), len(out_grid))) ivar_out = np.zeros_like(flux_out) # These lines are necessary for SDSS spectra. For DESI # spectra nothing will change here, since the entire DESI grid is contained # within the QuasarNET one, but for BOSS/eBOSS the grid can extend out # past the QuasarNET grid and give negative bin values. I have tests that # confirm this still works on DESI data, don't worry. fl_iv = fl_iv[:, w] new_grid = new_grid[w] ivar_temp = ivar[:, w] for i in range(len(flux)): c = np.bincount(new_grid, weights=fl_iv[i, :]) flux_out[i, :len(c)] += c c = np.bincount(new_grid, weights=ivar_temp[i, :]) ivar_out[i, :len(c)] += c return flux_out, ivar_out
[docs] def renormalize(flux, ivar): """Renormalize the flux for processing with QuasarNet. The process for renormalizing flux is as follows. First, the weighted mean of the flux is calculated, with the ivar as weights. Then the weighted root mean squared value of the flux minus the mean is computed, once again using the ivar as weights. The renormalized flux is the initial flux minus the weighted mean and then divided by the rms value. Parameters ---------- flux : numpy.ndarray Input flux array of shape `(nspec, len(w_grid))`. ivar : numpy.ndarray Input ivar array of shape `(nspec, len(w_grid))`. Returns ------- flux_out : numpy.ndarray Input flux renormalized for use with QuasarNet. """ # axis=1 corresponds to the rebinned spectral axis # Finding the weighted mean both for normalization and for the rms mean = np.average(flux, axis=1, weights=ivar)[:, None] rms = np.sqrt(np.average((flux - mean) ** 2, axis=1, weights=ivar))[:, None] # Normalize by subtracting the weighted mean and dividing by the rms # as prescribed in the original QuasarNet paper. return (flux - mean) / rms