Large Linelists & Chunked Synthesis
When synthesizing spectra from large linelists (thousands of lines spanning hundreds of Angstroms), the full wavelength range may not fit in GPU memory at once, or the single-call computation may be impractically slow. calc_formation_temp_chunked solves this by dividing the wavelength range into fixed-width chunks, computing each independently, and returning the raw per-chunk results for the caller to stitch together.
In-memory workflow
For linelists that fit in RAM after chunking, the simplest approach returns a Vector{FormTempResult}:
using Korg
using FormationTemps; FT = FormationTemps
linelist = Korg.read_linelist(joinpath(FT.datdir, "Sun_VALD.lin"))
star = StellarProps(Teff=5777.0, logg=4.44, Fe_H=0.0,
vsini=2100.0, v_macro=3400.0, v_micro=850.0)
# returns Vector{FormTempResult}, one per chunk
chunks = calc_formation_temp_chunked(star, linelist;
chunk_width=50.0, wing_padding=30.0, overlap=5.0,
Δλ=0.01, convolve=false, Nϕ=32)Each element of chunks is a full FormTempResult with wavs, flux, form_temps, cont_func, and atmosphere fields. Adjacent chunks overlap by overlap Angstroms, so the caller can blend or cut at the midpoint.
Streaming to disk
For very large linelists where even the accumulated Vector{FormTempResult} would exceed available memory, pass a callback function. This streams each chunk to disk as it completes, and calc_formation_temp_chunked returns nothing.
The following example is from scripts/generate_temp_spectrum.jl and writes each chunk to an HDF5 group:
println(">>> Synthesizing chunks...")
let chunk_idx = Ref(0)
h5open(outfile, "w") do h5
# metadata
HDF5.attributes(h5)["chunk_width"] = chunk_width
HDF5.attributes(h5)["wing_padding"] = wing_padding
HDF5.attributes(h5)["overlap"] = overlap
HDF5.attributes(h5)["n_lines"] = length(linelist)
HDF5.attributes(h5)["Teff"] = star_props.Teff
HDF5.attributes(h5)["logg"] = star_props.logg
HDF5.attributes(h5)["Fe_H"] = star_props.Fe_H
HDF5.attributes(h5)["vsini"] = star_props.vsini
HDF5.attributes(h5)["zeta_RT"] = star_props.ζ
HDF5.attributes(h5)["xi"] = star_props.ξ
HDF5.attributes(h5)["rho_star"] = star_props.ρstar
HDF5.attributes(h5)["i_star"] = star_props.istar
HDF5.attributes(h5)["wavelength_frame"] = wav_label
# include atmosphere data in the same output file
g_atm = create_group(h5, "model_atmosphere")
g_atm["zs"] = zs
g_atm["nd"] = nd
g_atm["Ts"] = Ts
g_atm["τs_ref"] = τs_ref
# callback writes each chunk to HDF5 as it completes
function write_chunk(ci, result, ll_chunk)
chunk_idx[] = ci
wavs = collect(result.wavs)
line_centers = [l.wl * 1e8 for l in ll_chunk]
g = create_group(h5, @sprintf("chunk_%04d", ci))
g["line_centers"] = line_centers
g["wavs"] = wavs
g["flux"] = result.flux
g["temp"] = result.form_temps
g["cfunc"] = result.cont_func
end
FT.calc_formation_temp_chunked(star_props, linelist;
chunk_width=chunk_width,
wing_padding=wing_padding,
overlap=overlap,
Δλ=Δλ, buffer=buffer,
convolve=false, Nϕ=Nϕ,
ne_warn_thresh=Inf,
callback=write_chunk)
end
endThe callback signature is (chunk_idx::Int, result::FormTempResult, ll_chunk) -> nothing, where ll_chunk is the padded linelist view used for that chunk.
Chunking parameters
# chunked computation parameters
chunk_width = 50.0 # Å per wavelength chunk
wing_padding = 30.0 # Å beyond chunk edges for linelist selection
overlap = 5.0 # Å overlap between chunks for stitching
Δλ = 0.001
Nϕ = 128
buffer = 3.0The key parameters are:
| Parameter | Default | Description |
|---|---|---|
chunk_width | 50.0 | Width of each wavelength chunk in Angstroms. |
wing_padding | 30.0 | Extra range (Angstroms) beyond each chunk edge for linelist selection. Ensures that broad line wings (e.g., H-alpha, Ca II) contribute correctly at chunk boundaries. |
overlap | 5.0 | Overlap width (Angstroms) between adjacent chunks. Used during post-hoc stitching to blend or cut. |
Δλ | 0.001 | Wavelength step size in Angstroms. |
buffer | 2.0 | Extra wavelength range (Angstroms) beyond the linelist edges. |
Too small a wing_padding causes lines near chunk edges to be computed without their full wings, introducing residuals at stitch boundaries. 30 Angstroms is conservative; 15 Angstroms suffices for most lines but not for very broad resonant lines (e.g., H-alpha).
Stitching chunks
calc_formation_temp_chunked returns raw chunks that need to be stitched together.
The below example from scripts/generate_temp_spectrum.jl linearly crossfades flux, formation temperatures, and contribution functions across the overlap region.
println(">>> Splicing chunks (blend)...")
h5open(outfile, "r") do h5in
chunk_names = sort(filter(name -> startswith(name, "chunk_"), collect(keys(h5in))))
nchunks = length(chunk_names)
if nchunks == 0
error("No chunk groups found in $(outfile).")
end
# read all chunks into memory for blending
raw_wavs = Vector{Vector{Float64}}(undef, nchunks)
raw_flux = Vector{Vector{Float64}}(undef, nchunks)
raw_temp = Vector{Vector{Float64}}(undef, nchunks)
raw_cfunc = Vector{Matrix{Float64}}(undef, nchunks)
raw_lc = Vector{Vector{Float64}}(undef, nchunks)
for (i, gn) in enumerate(chunk_names)
g = h5in[gn]
raw_wavs[i] = vec(read(g["wavs"]))
raw_flux[i] = vec(read(g["flux"]))
raw_temp[i] = vec(read(g["temp"]))
raw_cfunc[i] = read(g["cfunc"])
raw_lc[i] = vec(read(g["line_centers"]))
end
# blend: accumulate starting from chunk 1, linearly crossfade in overlap
N_ov = max(0, round(Int, overlap / Δλ) + 1)
all_wavs = copy(raw_wavs[1])
all_flux = copy(raw_flux[1])
all_temps = copy(raw_temp[1])
all_cfunc = copy(raw_cfunc[1])
all_lc = copy(raw_lc[1])
for i in 2:nchunks
Nλ_new = length(raw_wavs[i])
N_ov_actual = min(N_ov, length(all_wavs), Nλ_new)
# blend the overlap region
for k in 1:N_ov_actual
w_new = Float64(k) / Float64(N_ov_actual + 1)
w_old = 1.0 - w_new
acc_idx = length(all_wavs) - N_ov_actual + k
all_flux[acc_idx] = w_old * all_flux[acc_idx] + w_new * raw_flux[i][k]
all_temps[acc_idx] = w_old * all_temps[acc_idx] + w_new * raw_temp[i][k]
all_cfunc[:, acc_idx] .= w_old .* all_cfunc[:, acc_idx] .+ w_new .* raw_cfunc[i][:, k]
end
# append the non-overlapping tail
if N_ov_actual < Nλ_new
tail = (N_ov_actual + 1):Nλ_new
append!(all_wavs, raw_wavs[i][tail])
append!(all_flux, raw_flux[i][tail])
append!(all_temps, raw_temp[i][tail])
all_cfunc = hcat(all_cfunc, raw_cfunc[i][:, tail])
append!(all_lc, raw_lc[i][filter(j -> raw_lc[i][j] > all_wavs[end - length(tail)], eachindex(raw_lc[i]))])
end
end
all_lc = sort(unique(all_lc))
# trim to actual linelist extent + buffer
wls_Å = [l.wl * 1e8 for l in linelist]
trim_lo = first(wls_Å) - buffer
trim_hi = last(wls_Å) + buffer
keep = (all_wavs .>= trim_lo) .& (all_wavs .<= trim_hi)
all_wavs = all_wavs[keep]
all_flux = all_flux[keep]
all_temps = all_temps[keep]
all_cfunc = all_cfunc[:, keep]
all_lc = all_lc[(all_lc .>= trim_lo) .& (all_lc .<= trim_hi)]
# write blended result as a single group
h5open(outfile_1d, "w") do h5out
HDF5.attributes(h5out)["chunk_width"] = chunk_width
HDF5.attributes(h5out)["n_lines"] = length(linelist)
HDF5.attributes(h5out)["n_chunks"] = nchunks
HDF5.attributes(h5out)["spliced"] = 1
HDF5.attributes(h5out)["stitch_mode"] = "blend"
HDF5.attributes(h5out)["Teff"] = star_props.Teff
HDF5.attributes(h5out)["logg"] = star_props.logg
HDF5.attributes(h5out)["Fe_H"] = star_props.Fe_H
HDF5.attributes(h5out)["vsini"] = star_props.vsini
HDF5.attributes(h5out)["zeta_RT"] = star_props.ζ
HDF5.attributes(h5out)["xi"] = star_props.ξ
HDF5.attributes(h5out)["rho_star"] = star_props.ρstar
HDF5.attributes(h5out)["i_star"] = star_props.istar
HDF5.attributes(h5out)["wavelength_frame"] = wav_label
g_atm = create_group(h5out, "model_atmosphere")
g_atm["zs"] = zs
g_atm["nd"] = nd
g_atm["Ts"] = Ts
g_atm["τs_ref"] = τs_ref
g_out = create_group(h5out, "chunk_0001")
g_out["line_centers"] = all_lc
g_out["wavs"] = all_wavs
g_out["flux"] = all_flux
g_out["temp"] = all_temps
g_out["cfunc"] = all_cfunc
end
endComplete example
The full production script combining all of the above is at scripts/generate_temp_spectrum.jl. It:
- Loads a large VALD linelist and converts to air wavelengths.
- Streams chunked results to HDF5.
- Reads the raw chunks back and stitches them.
- Trims the result to the linelist extent and writes the final spliced output.