|
1 | | -from collections import defaultdict |
2 | | -from collections import namedtuple |
| 1 | +import os |
| 2 | +import re |
3 | 3 | from pathlib import Path |
4 | 4 |
|
5 | | -import artisatomic |
| 5 | +import numpy as np |
| 6 | +import polars as pl |
6 | 7 |
|
7 | | -hc_in_ev_cm = 0.0001239841984332003 |
| 8 | +import artisatomic |
8 | 9 |
|
9 | 10 | kuruczdatapath = Path(__file__).parent.absolute() / ".." / "atomic-data-kurucz" |
| 11 | +if os.environ.get("ARTISATOMIC_TESTMODE") == "1": |
| 12 | + kuruczdatapath = kuruczdatapath / "test_sample" |
10 | 13 |
|
11 | 14 |
|
12 | | -def find_gfall(atomic_number: int, ion_charge: int) -> Path: |
13 | | - path_gfall = (kuruczdatapath / "extendedatoms" / f"gf{atomic_number:02d}{ion_charge:02d}.lines").resolve() |
14 | | - |
15 | | - if not path_gfall.is_file(): |
16 | | - path_gfall = (kuruczdatapath / "extendedatoms" / f"gf{atomic_number:02d}{ion_charge:02d}z.lines").resolve() |
17 | | - |
18 | | - if not path_gfall.is_file(): |
19 | | - path_gfall = (kuruczdatapath / "zztar" / f"gf{atomic_number:02d}{ion_charge:02d}.all").resolve() |
20 | | - |
21 | | - if not path_gfall.is_file(): |
22 | | - raise FileNotFoundError(f"No Kurucz file for Z={atomic_number} ion_charge {ion_charge}") |
23 | | - |
24 | | - return path_gfall |
25 | | - |
26 | | - |
27 | | -def get_levelname(row) -> str: |
28 | | - return f"{row.label},enpercm={row.energy},j={row.j}" |
29 | | - |
30 | | - |
31 | | -def read_levels_data(dflevels): |
32 | | - energy_level_tuple = namedtuple("energylevel", "levelname energyabovegsinpercm g parity") |
33 | | - |
34 | | - energy_levels = [] |
| 15 | +def parse_gfall(fname: str) -> pl.LazyFrame: |
| 16 | + # Code derived from the GFALL reader of carsus |
| 17 | + # https://github.com/tardis-sn/carsus/blob/master/carsus/io/kurucz/gfall.py |
| 18 | + gfall_fortran_format = ( |
| 19 | + "F11.4,F7.3,F6.2,F12.3,F5.2,1X,A10,F12.3,F5.2,1X," |
| 20 | + "A10,F6.2,F6.2,F6.2,A4,I2,I2,I3,F6.3,I3,F6.3,I5,I5," |
| 21 | + "1X,I1,A1,1X,I1,A1,I1,A3,I5,I5,I6" |
| 22 | + ) |
35 | 23 |
|
36 | | - for index, row in dflevels.iterrows(): |
37 | | - parity = -index # give a unique parity so that all transitions are permitted |
38 | | - energyabovegsinpercm = float(row.energy) |
39 | | - g = 2 * row.j + 1 |
40 | | - newlevel = energy_level_tuple( |
41 | | - levelname=get_levelname(row), parity=parity, g=g, energyabovegsinpercm=energyabovegsinpercm |
| 24 | + gfall_columns = [ |
| 25 | + "wavelength_nm", |
| 26 | + "loggf", |
| 27 | + "z_dot_ioncharge", |
| 28 | + "energyabovegsinpercm_first", |
| 29 | + "j_first", |
| 30 | + "blank1", |
| 31 | + "label_first", |
| 32 | + "energyabovegsinpercm_second", |
| 33 | + "j_second", |
| 34 | + "blank2", |
| 35 | + "label_second", |
| 36 | + "log_gamma_rad", |
| 37 | + "log_gamma_stark", |
| 38 | + "log_gamma_vderwaals", |
| 39 | + "ref", |
| 40 | + "nlte_level_no_first", |
| 41 | + "nlte_level_no_second", |
| 42 | + "isotope", |
| 43 | + "log_f_hyperfine", |
| 44 | + "isotope2", |
| 45 | + "log_iso_abundance", |
| 46 | + "hyper_shift_first", |
| 47 | + "hyper_shift_second", |
| 48 | + "blank3", |
| 49 | + "hyperfine_f_first", |
| 50 | + "hyperfine_note_first", |
| 51 | + "blank4", |
| 52 | + "hyperfine_f_second", |
| 53 | + "hyperfine_note_second", |
| 54 | + "line_strength_class", |
| 55 | + "line_code", |
| 56 | + "lande_g_first", |
| 57 | + "lande_g_second", |
| 58 | + "isotopic_shift", |
| 59 | + ] |
| 60 | + number_match = re.compile(r"\d+(\.\d+)?") |
| 61 | + type_match = re.compile(r"[FIXA]") |
| 62 | + type_dict = {"F": np.float64, "I": np.int64, "X": str, "A": str} |
| 63 | + field_types = tuple(type_dict[item] for item in number_match.sub("", gfall_fortran_format).split(",")) |
| 64 | + |
| 65 | + field_widths = list(map(int, re.sub(r"\.\d+", "", type_match.sub("", gfall_fortran_format)).split(","))) |
| 66 | + |
| 67 | + import pandas as pd |
| 68 | + |
| 69 | + gfall = ( |
| 70 | + pl.from_pandas( |
| 71 | + pd.read_fwf( |
| 72 | + fname, |
| 73 | + widths=field_widths, |
| 74 | + skip_blank_lines=True, |
| 75 | + names=gfall_columns, |
| 76 | + dtypes=dict(zip(gfall_columns, field_types, strict=True)), |
| 77 | + compression="infer", |
| 78 | + dtype_backend="pyarrow", |
| 79 | + ) |
42 | 80 | ) |
43 | | - energy_levels.append(newlevel) |
44 | | - |
45 | | - energy_levels.sort(key=lambda x: x.energyabovegsinpercm) |
46 | | - |
47 | | - return [None, *energy_levels] |
48 | | - |
49 | | - |
50 | | -def read_lines_data(energy_levels, dflines): |
51 | | - transitions = [] |
52 | | - transition_count_of_level_name = defaultdict(int) |
53 | | - transitiontuple = namedtuple("transition", "lowerlevel upperlevel A coll_str") |
| 81 | + .lazy() |
| 82 | + .drop_nulls(["z_dot_ioncharge", "energyabovegsinpercm_first", "energyabovegsinpercm_second"]) |
| 83 | + ) |
| 84 | + double_columns = [col.replace("_first", "") for col in gfall.collect_schema().names() if col.endswith("first")] |
54 | 85 |
|
55 | | - for (lowerindex, upperindex), row in dflines.iterrows(): |
56 | | - lowerlevel = lowerindex + 1 |
57 | | - upperlevel = upperindex + 1 |
| 86 | + # due to the fact that energy is stored in 1/cm |
| 87 | + gfall = gfall.with_columns( |
| 88 | + order_lower_upper=pl.col("energyabovegsinpercm_first").abs() < pl.col("energyabovegsinpercm_second").abs() |
| 89 | + ) |
| 90 | + gfall = gfall.with_columns( |
| 91 | + pl.when(pl.col("order_lower_upper")) |
| 92 | + .then(f"{column}_first") |
| 93 | + .otherwise(f"{column}_second") |
| 94 | + .alias(f"{column}_lower") |
| 95 | + for column in double_columns |
| 96 | + ).with_columns( |
| 97 | + pl.when(pl.col("order_lower_upper")) |
| 98 | + .then(f"{column}_second") |
| 99 | + .otherwise(f"{column}_first") |
| 100 | + .alias(f"{column}_upper") |
| 101 | + for column in double_columns |
| 102 | + ) |
58 | 103 |
|
59 | | - transtuple = transitiontuple(lowerlevel=lowerlevel, upperlevel=upperlevel, A=row.A, coll_str=-1) |
| 104 | + # Clean labels |
| 105 | + ignored_labels = ["AVERAGE", "ENERGIES", "CONTINUUM"] |
| 106 | + gfall = gfall.with_columns( |
| 107 | + pl.col("label_lower").str.strip_chars().replace(r"\s+", " "), |
| 108 | + pl.col("label_upper").str.strip_chars().replace(r"\s+", " "), |
| 109 | + ).filter( |
| 110 | + (pl.col("label_lower").is_in(ignored_labels).not_()) & (pl.col("label_upper").is_in(ignored_labels).not_()) |
| 111 | + ) |
60 | 112 |
|
61 | | - # print(line) |
62 | | - transition_count_of_level_name[energy_levels[lowerlevel].levelname] += 1 |
63 | | - transition_count_of_level_name[energy_levels[upperlevel].levelname] += 1 |
| 113 | + gfall = gfall.with_columns( |
| 114 | + energyabovegsinpercm_lower_predicted=pl.col("energyabovegsinpercm_lower") < 0, |
| 115 | + energyabovegsinpercm_lower=pl.col("energyabovegsinpercm_lower").abs(), |
| 116 | + energyabovegsinpercm_upper_predicted=pl.col("energyabovegsinpercm_upper") < 0, |
| 117 | + energyabovegsinpercm_upper=pl.col("energyabovegsinpercm_upper").abs(), |
| 118 | + ) |
64 | 119 |
|
65 | | - transitions.append(transtuple) |
| 120 | + gfall = gfall.with_columns(atomic_number=pl.col("z_dot_ioncharge").cast(pl.Int64)).with_columns( |
| 121 | + ion_charge=((pl.col("z_dot_ioncharge") - pl.col("atomic_number")) * 100).round().cast(pl.Int64), |
| 122 | + ) |
| 123 | + if gfall.select(pl.n_unique("z_dot_ioncharge")).collect().item() != 1: |
| 124 | + raise ValueError(f"Expected exactly one unique ion in file {fname}, but found multiple") |
66 | 125 |
|
67 | | - return transitions, transition_count_of_level_name |
| 126 | + return gfall |
68 | 127 |
|
69 | 128 |
|
70 | | -def read_levels_and_transitions(atomic_number, ion_stage, flog): |
| 129 | +def find_gfall(atomic_number: int, ion_charge: int) -> Path: |
| 130 | + extended_atoms_filenames = [ |
| 131 | + f"gf{atomic_number:02d}{ion_charge:02d}.lines.zst", |
| 132 | + f"gf{atomic_number:02d}{ion_charge:02d}.lines", |
| 133 | + f"gf{atomic_number:02d}{ion_charge:02d}z.lines.zst", |
| 134 | + f"gf{atomic_number:02d}{ion_charge:02d}z.lines", |
| 135 | + ] |
| 136 | + for filename in extended_atoms_filenames: |
| 137 | + path_gfall = (kuruczdatapath / "extendedatoms" / filename).resolve() |
| 138 | + if path_gfall.is_file(): |
| 139 | + return path_gfall |
| 140 | + zztar_filenames = [f"gf{atomic_number:02d}{ion_charge:02d}.all", f"gf{atomic_number:02d}{ion_charge:02d}.all.zst"] |
| 141 | + for filename in zztar_filenames: |
| 142 | + path_gfall = (kuruczdatapath / "zztar" / filename).resolve() |
| 143 | + if path_gfall.is_file(): |
| 144 | + return path_gfall |
| 145 | + |
| 146 | + raise FileNotFoundError(f"No Kurucz file for Z={atomic_number} ion_charge {ion_charge}.") |
| 147 | + |
| 148 | + |
| 149 | +def read_levels_and_transitions( |
| 150 | + atomic_number: int, ion_stage: int, flog |
| 151 | +) -> tuple[float, pl.DataFrame, pl.DataFrame, dict[str, int]]: |
71 | 152 | ion_charge = ion_stage - 1 |
72 | 153 |
|
73 | 154 | artisatomic.log_and_print(flog, f"Using Kurucz for Z={atomic_number} ion_stage {ion_stage}") |
74 | 155 |
|
75 | | - from carsus.io.kurucz import GFALLReader # ty:ignore[unresolved-import] |
76 | | - |
77 | | - # path_gfall = (Path(__file__).parent.absolute() / ".." / "atomic-data-kurucz" / "gfall.dat").resolve() |
78 | 156 | path_gfall = find_gfall(atomic_number, ion_charge) |
79 | 157 | artisatomic.log_and_print(flog, f"Reading {path_gfall}") |
80 | | - gfall_reader = GFALLReader( |
81 | | - ions=f"{artisatomic.elsymbols[atomic_number]} {ion_charge}", |
82 | | - fname=str(path_gfall), |
83 | | - unique_level_identifier=["energy", "j"], |
84 | | - ) |
85 | | - |
86 | | - dflevels = gfall_reader.extract_levels().loc[atomic_number, ion_charge] |
87 | | - |
88 | | - energy_levels = read_levels_data(dflevels) |
89 | | - |
90 | | - artisatomic.log_and_print(flog, f"Read {len(energy_levels[1:]):d} levels") |
91 | | - |
92 | | - dflines = gfall_reader.extract_lines().loc[atomic_number, ion_charge] |
93 | 158 |
|
94 | | - # wavelengths are in nanometers, so multiply by 10 to get Angstroms |
95 | | - dflines["A"] = dflines["gf"] / (1.49919e-16 * (2 * dflines["j_upper"] + 1) * (dflines["wavelength"] * 10.0) ** 2) |
| 159 | + gfall = parse_gfall(fname=str(path_gfall)) |
| 160 | + column_renames = { |
| 161 | + "energyabovegsinpercm_{0}": "energyabovegsinpercm", |
| 162 | + "j_{0}": "j", |
| 163 | + "label_{0}": "label", |
| 164 | + "energyabovegsinpercm_{0}_predicted": "theoretical", |
| 165 | + } |
| 166 | + |
| 167 | + e_lower_levels = gfall.rename({key.format("lower"): value for key, value in column_renames.items()}) |
| 168 | + e_upper_levels = gfall.rename({key.format("upper"): value for key, value in column_renames.items()}) |
| 169 | + |
| 170 | + selected_columns = ["atomic_number", "ion_charge", "energyabovegsinpercm", "j", "label", "theoretical"] |
| 171 | + dflevels = ( |
| 172 | + pl.concat([e_lower_levels.select(selected_columns), e_upper_levels.select(selected_columns)]) |
| 173 | + .unique(["energyabovegsinpercm", "j"], keep="first") |
| 174 | + .sort(["energyabovegsinpercm", "j", "label"]) |
| 175 | + .with_row_index("levelid") |
| 176 | + .select( |
| 177 | + pl.col("energyabovegsinpercm"), |
| 178 | + pl.col("j"), |
| 179 | + levelname=( |
| 180 | + pl.col("label") |
| 181 | + + ",enpercm=" |
| 182 | + + pl.col("energyabovegsinpercm").cast(pl.Utf8) |
| 183 | + + ",j=" |
| 184 | + + pl.col("j").cast(pl.String) |
| 185 | + ), |
| 186 | + g=2 * pl.col("j") + 1, |
| 187 | + ) |
| 188 | + .sort("energyabovegsinpercm", "j") |
| 189 | + .collect() |
| 190 | + ) |
| 191 | + dflevels = ( |
| 192 | + artisatomic.add_dummy_zero_level(dflevels) |
| 193 | + .with_row_index("levelid") |
| 194 | + .with_columns(pl.col("levelid").cast(pl.Int64)) |
| 195 | + .with_columns(parity=-pl.col("levelid")) # give a unique parity so that all transitions are permitted |
| 196 | + ) |
| 197 | + artisatomic.log_and_print(flog, f"Read {len(dflevels) - 1:d} levels") |
| 198 | + |
| 199 | + transitions = ( |
| 200 | + gfall.select( |
| 201 | + [ |
| 202 | + "atomic_number", |
| 203 | + "ion_charge", |
| 204 | + "energyabovegsinpercm_lower", |
| 205 | + "j_lower", |
| 206 | + "energyabovegsinpercm_upper", |
| 207 | + "j_upper", |
| 208 | + "wavelength_nm", |
| 209 | + "loggf", |
| 210 | + ] |
| 211 | + ) |
| 212 | + .with_columns(gf=10 ** pl.col("loggf")) |
| 213 | + .drop("loggf") |
| 214 | + .join( |
| 215 | + dflevels.lazy().select( |
| 216 | + energyabovegsinpercm_lower=pl.col("energyabovegsinpercm"), |
| 217 | + j_lower=pl.col("j"), |
| 218 | + levelid_lower=pl.col("levelid"), |
| 219 | + ), |
| 220 | + on=["energyabovegsinpercm_lower", "j_lower"], |
| 221 | + how="left", |
| 222 | + ) |
| 223 | + .join( |
| 224 | + dflevels.lazy().select( |
| 225 | + energyabovegsinpercm_upper=pl.col("energyabovegsinpercm"), |
| 226 | + j_upper=pl.col("j"), |
| 227 | + levelid_upper=pl.col("levelid"), |
| 228 | + ), |
| 229 | + on=["energyabovegsinpercm_upper", "j_upper"], |
| 230 | + how="left", |
| 231 | + ) |
| 232 | + .with_columns( |
| 233 | + # wavelengths are in nanometers, so multiply by 10 to get Angstroms |
| 234 | + A=pl.col("gf") / (1.49919e-16 * (2 * pl.col("j_upper") + 1) * (pl.col("wavelength_nm") * 10.0).pow(2)) |
| 235 | + ) |
| 236 | + .select( |
| 237 | + upperlevel=pl.col("levelid_upper"), |
| 238 | + lowerlevel=pl.col("levelid_lower"), |
| 239 | + A=pl.col("A"), |
| 240 | + coll_str=pl.lit(-1), |
| 241 | + ) |
| 242 | + .collect() |
| 243 | + ) |
96 | 244 |
|
97 | | - transitions, transition_count_of_level_name = read_lines_data(energy_levels, dflines) |
| 245 | + transition_count_of_level_name = { |
| 246 | + levelname: ( |
| 247 | + transitions.select(((pl.col("lowerlevel") == levelid) | (pl.col("upperlevel") == levelid)).sum()).item() |
| 248 | + ) |
| 249 | + for levelid, levelname in dflevels.select("levelid", "levelname").iter_rows(named=False) |
| 250 | + } |
98 | 251 | artisatomic.log_and_print(flog, f"Read {len(transitions):d} transitions") |
99 | 252 |
|
100 | 253 | ionization_energy_in_ev = artisatomic.get_nist_ionization_energies_ev()[(atomic_number, ion_stage)] |
101 | 254 | artisatomic.log_and_print(flog, f"ionization energy: {ionization_energy_in_ev} eV") |
102 | 255 |
|
103 | | - return ionization_energy_in_ev, energy_levels, transitions, transition_count_of_level_name |
| 256 | + return ionization_energy_in_ev, dflevels, transitions, transition_count_of_level_name |
104 | 257 |
|
105 | 258 |
|
106 | 259 | def get_level_valence_n(levelname: str): |
|
0 commit comments