Skip to content

Commit 905cb4f

Browse files
committed
Add custom polars Kurucz reader and test
1 parent ad7f382 commit 905cb4f

File tree

15 files changed

+260
-296
lines changed

15 files changed

+260
-296
lines changed

.github/workflows/test.yml

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ jobs:
7474
runs-on: ubuntu-24.04
7575
env:
7676
UV_FROZEN: 1
77+
ARTISATOMIC_TESTMODE: 1
7778
steps:
7879
- name: Checkout Code
7980
uses: actions/checkout@v4
@@ -108,11 +109,12 @@ jobs:
108109
tests:
109110
strategy:
110111
matrix:
111-
testname: [cmfgen, floers25, jplt, qub]
112+
testname: [cmfgen, floers25, jplt, kurucz, qub]
112113
fail-fast: false
113114
runs-on: ubuntu-24.04
114115
env:
115116
UV_FROZEN: 1
117+
ARTISATOMIC_TESTMODE: 1
116118
timeout-minutes: 45
117119
name: test ${{ matrix.testname }}
118120

@@ -162,11 +164,6 @@ jobs:
162164
working-directory: atomic-data-floers25/
163165
run: tar -xJvf testdata.tar.xz
164166

165-
- name: Extract QUB test data
166-
if: matrix.testname == 'qub'
167-
working-directory: atomic-data-qub/
168-
run: rsync -av co_tyndall_test_sample/ co_tyndall/
169-
170167
- name: Generate artis atomic data files
171168
run: |
172169
cp tests/${{ matrix.testname }}/artisatomicionhandlers.json .

artisatomic/readkuruczdata.py

Lines changed: 227 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,106 +1,259 @@
1-
from collections import defaultdict
2-
from collections import namedtuple
1+
import os
2+
import re
33
from pathlib import Path
44

5-
import artisatomic
5+
import numpy as np
6+
import polars as pl
67

7-
hc_in_ev_cm = 0.0001239841984332003
8+
import artisatomic
89

910
kuruczdatapath = Path(__file__).parent.absolute() / ".." / "atomic-data-kurucz"
11+
if os.environ.get("ARTISATOMIC_TESTMODE") == "1":
12+
kuruczdatapath = kuruczdatapath / "test_sample"
1013

1114

12-
def find_gfall(atomic_number: int, ion_charge: int) -> Path:
13-
path_gfall = (kuruczdatapath / "extendedatoms" / f"gf{atomic_number:02d}{ion_charge:02d}.lines").resolve()
14-
15-
if not path_gfall.is_file():
16-
path_gfall = (kuruczdatapath / "extendedatoms" / f"gf{atomic_number:02d}{ion_charge:02d}z.lines").resolve()
17-
18-
if not path_gfall.is_file():
19-
path_gfall = (kuruczdatapath / "zztar" / f"gf{atomic_number:02d}{ion_charge:02d}.all").resolve()
20-
21-
if not path_gfall.is_file():
22-
raise FileNotFoundError(f"No Kurucz file for Z={atomic_number} ion_charge {ion_charge}")
23-
24-
return path_gfall
25-
26-
27-
def get_levelname(row) -> str:
28-
return f"{row.label},enpercm={row.energy},j={row.j}"
29-
30-
31-
def read_levels_data(dflevels):
32-
energy_level_tuple = namedtuple("energylevel", "levelname energyabovegsinpercm g parity")
33-
34-
energy_levels = []
15+
def parse_gfall(fname: str) -> pl.LazyFrame:
16+
# Code derived from the GFALL reader of carsus
17+
# https://github.com/tardis-sn/carsus/blob/master/carsus/io/kurucz/gfall.py
18+
gfall_fortran_format = (
19+
"F11.4,F7.3,F6.2,F12.3,F5.2,1X,A10,F12.3,F5.2,1X,"
20+
"A10,F6.2,F6.2,F6.2,A4,I2,I2,I3,F6.3,I3,F6.3,I5,I5,"
21+
"1X,I1,A1,1X,I1,A1,I1,A3,I5,I5,I6"
22+
)
3523

36-
for index, row in dflevels.iterrows():
37-
parity = -index # give a unique parity so that all transitions are permitted
38-
energyabovegsinpercm = float(row.energy)
39-
g = 2 * row.j + 1
40-
newlevel = energy_level_tuple(
41-
levelname=get_levelname(row), parity=parity, g=g, energyabovegsinpercm=energyabovegsinpercm
24+
gfall_columns = [
25+
"wavelength_nm",
26+
"loggf",
27+
"z_dot_ioncharge",
28+
"energyabovegsinpercm_first",
29+
"j_first",
30+
"blank1",
31+
"label_first",
32+
"energyabovegsinpercm_second",
33+
"j_second",
34+
"blank2",
35+
"label_second",
36+
"log_gamma_rad",
37+
"log_gamma_stark",
38+
"log_gamma_vderwaals",
39+
"ref",
40+
"nlte_level_no_first",
41+
"nlte_level_no_second",
42+
"isotope",
43+
"log_f_hyperfine",
44+
"isotope2",
45+
"log_iso_abundance",
46+
"hyper_shift_first",
47+
"hyper_shift_second",
48+
"blank3",
49+
"hyperfine_f_first",
50+
"hyperfine_note_first",
51+
"blank4",
52+
"hyperfine_f_second",
53+
"hyperfine_note_second",
54+
"line_strength_class",
55+
"line_code",
56+
"lande_g_first",
57+
"lande_g_second",
58+
"isotopic_shift",
59+
]
60+
number_match = re.compile(r"\d+(\.\d+)?")
61+
type_match = re.compile(r"[FIXA]")
62+
type_dict = {"F": np.float64, "I": np.int64, "X": str, "A": str}
63+
field_types = tuple(type_dict[item] for item in number_match.sub("", gfall_fortran_format).split(","))
64+
65+
field_widths = list(map(int, re.sub(r"\.\d+", "", type_match.sub("", gfall_fortran_format)).split(",")))
66+
67+
import pandas as pd
68+
69+
gfall = (
70+
pl.from_pandas(
71+
pd.read_fwf(
72+
fname,
73+
widths=field_widths,
74+
skip_blank_lines=True,
75+
names=gfall_columns,
76+
dtypes=dict(zip(gfall_columns, field_types, strict=True)),
77+
compression="infer",
78+
dtype_backend="pyarrow",
79+
)
4280
)
43-
energy_levels.append(newlevel)
44-
45-
energy_levels.sort(key=lambda x: x.energyabovegsinpercm)
46-
47-
return [None, *energy_levels]
48-
49-
50-
def read_lines_data(energy_levels, dflines):
51-
transitions = []
52-
transition_count_of_level_name = defaultdict(int)
53-
transitiontuple = namedtuple("transition", "lowerlevel upperlevel A coll_str")
81+
.lazy()
82+
.drop_nulls(["z_dot_ioncharge", "energyabovegsinpercm_first", "energyabovegsinpercm_second"])
83+
)
84+
double_columns = [col.replace("_first", "") for col in gfall.collect_schema().names() if col.endswith("first")]
5485

55-
for (lowerindex, upperindex), row in dflines.iterrows():
56-
lowerlevel = lowerindex + 1
57-
upperlevel = upperindex + 1
86+
# due to the fact that energy is stored in 1/cm
87+
gfall = gfall.with_columns(
88+
order_lower_upper=pl.col("energyabovegsinpercm_first").abs() < pl.col("energyabovegsinpercm_second").abs()
89+
)
90+
gfall = gfall.with_columns(
91+
pl.when(pl.col("order_lower_upper"))
92+
.then(f"{column}_first")
93+
.otherwise(f"{column}_second")
94+
.alias(f"{column}_lower")
95+
for column in double_columns
96+
).with_columns(
97+
pl.when(pl.col("order_lower_upper"))
98+
.then(f"{column}_second")
99+
.otherwise(f"{column}_first")
100+
.alias(f"{column}_upper")
101+
for column in double_columns
102+
)
58103

59-
transtuple = transitiontuple(lowerlevel=lowerlevel, upperlevel=upperlevel, A=row.A, coll_str=-1)
104+
# Clean labels
105+
ignored_labels = ["AVERAGE", "ENERGIES", "CONTINUUM"]
106+
gfall = gfall.with_columns(
107+
pl.col("label_lower").str.strip_chars().replace(r"\s+", " "),
108+
pl.col("label_upper").str.strip_chars().replace(r"\s+", " "),
109+
).filter(
110+
(pl.col("label_lower").is_in(ignored_labels).not_()) & (pl.col("label_upper").is_in(ignored_labels).not_())
111+
)
60112

61-
# print(line)
62-
transition_count_of_level_name[energy_levels[lowerlevel].levelname] += 1
63-
transition_count_of_level_name[energy_levels[upperlevel].levelname] += 1
113+
gfall = gfall.with_columns(
114+
energyabovegsinpercm_lower_predicted=pl.col("energyabovegsinpercm_lower") < 0,
115+
energyabovegsinpercm_lower=pl.col("energyabovegsinpercm_lower").abs(),
116+
energyabovegsinpercm_upper_predicted=pl.col("energyabovegsinpercm_upper") < 0,
117+
energyabovegsinpercm_upper=pl.col("energyabovegsinpercm_upper").abs(),
118+
)
64119

65-
transitions.append(transtuple)
120+
gfall = gfall.with_columns(atomic_number=pl.col("z_dot_ioncharge").cast(pl.Int64)).with_columns(
121+
ion_charge=((pl.col("z_dot_ioncharge") - pl.col("atomic_number")) * 100).round().cast(pl.Int64),
122+
)
123+
if gfall.select(pl.n_unique("z_dot_ioncharge")).collect().item() != 1:
124+
raise ValueError(f"Expected exactly one unique ion in file {fname}, but found multiple")
66125

67-
return transitions, transition_count_of_level_name
126+
return gfall
68127

69128

70-
def read_levels_and_transitions(atomic_number, ion_stage, flog):
129+
def find_gfall(atomic_number: int, ion_charge: int) -> Path:
130+
extended_atoms_filenames = [
131+
f"gf{atomic_number:02d}{ion_charge:02d}.lines.zst",
132+
f"gf{atomic_number:02d}{ion_charge:02d}.lines",
133+
f"gf{atomic_number:02d}{ion_charge:02d}z.lines.zst",
134+
f"gf{atomic_number:02d}{ion_charge:02d}z.lines",
135+
]
136+
for filename in extended_atoms_filenames:
137+
path_gfall = (kuruczdatapath / "extendedatoms" / filename).resolve()
138+
if path_gfall.is_file():
139+
return path_gfall
140+
zztar_filenames = [f"gf{atomic_number:02d}{ion_charge:02d}.all", f"gf{atomic_number:02d}{ion_charge:02d}.all.zst"]
141+
for filename in zztar_filenames:
142+
path_gfall = (kuruczdatapath / "zztar" / filename).resolve()
143+
if path_gfall.is_file():
144+
return path_gfall
145+
146+
raise FileNotFoundError(f"No Kurucz file for Z={atomic_number} ion_charge {ion_charge}.")
147+
148+
149+
def read_levels_and_transitions(
150+
atomic_number: int, ion_stage: int, flog
151+
) -> tuple[float, pl.DataFrame, pl.DataFrame, dict[str, int]]:
71152
ion_charge = ion_stage - 1
72153

73154
artisatomic.log_and_print(flog, f"Using Kurucz for Z={atomic_number} ion_stage {ion_stage}")
74155

75-
from carsus.io.kurucz import GFALLReader # ty:ignore[unresolved-import]
76-
77-
# path_gfall = (Path(__file__).parent.absolute() / ".." / "atomic-data-kurucz" / "gfall.dat").resolve()
78156
path_gfall = find_gfall(atomic_number, ion_charge)
79157
artisatomic.log_and_print(flog, f"Reading {path_gfall}")
80-
gfall_reader = GFALLReader(
81-
ions=f"{artisatomic.elsymbols[atomic_number]} {ion_charge}",
82-
fname=str(path_gfall),
83-
unique_level_identifier=["energy", "j"],
84-
)
85-
86-
dflevels = gfall_reader.extract_levels().loc[atomic_number, ion_charge]
87-
88-
energy_levels = read_levels_data(dflevels)
89-
90-
artisatomic.log_and_print(flog, f"Read {len(energy_levels[1:]):d} levels")
91-
92-
dflines = gfall_reader.extract_lines().loc[atomic_number, ion_charge]
93158

94-
# wavelengths are in nanometers, so multiply by 10 to get Angstroms
95-
dflines["A"] = dflines["gf"] / (1.49919e-16 * (2 * dflines["j_upper"] + 1) * (dflines["wavelength"] * 10.0) ** 2)
159+
gfall = parse_gfall(fname=str(path_gfall))
160+
column_renames = {
161+
"energyabovegsinpercm_{0}": "energyabovegsinpercm",
162+
"j_{0}": "j",
163+
"label_{0}": "label",
164+
"energyabovegsinpercm_{0}_predicted": "theoretical",
165+
}
166+
167+
e_lower_levels = gfall.rename({key.format("lower"): value for key, value in column_renames.items()})
168+
e_upper_levels = gfall.rename({key.format("upper"): value for key, value in column_renames.items()})
169+
170+
selected_columns = ["atomic_number", "ion_charge", "energyabovegsinpercm", "j", "label", "theoretical"]
171+
dflevels = (
172+
pl.concat([e_lower_levels.select(selected_columns), e_upper_levels.select(selected_columns)])
173+
.unique(["energyabovegsinpercm", "j"], keep="first")
174+
.sort(["energyabovegsinpercm", "j", "label"])
175+
.with_row_index("levelid")
176+
.select(
177+
pl.col("energyabovegsinpercm"),
178+
pl.col("j"),
179+
levelname=(
180+
pl.col("label")
181+
+ ",enpercm="
182+
+ pl.col("energyabovegsinpercm").cast(pl.Utf8)
183+
+ ",j="
184+
+ pl.col("j").cast(pl.String)
185+
),
186+
g=2 * pl.col("j") + 1,
187+
)
188+
.sort("energyabovegsinpercm", "j")
189+
.collect()
190+
)
191+
dflevels = (
192+
artisatomic.add_dummy_zero_level(dflevels)
193+
.with_row_index("levelid")
194+
.with_columns(pl.col("levelid").cast(pl.Int64))
195+
.with_columns(parity=-pl.col("levelid")) # give a unique parity so that all transitions are permitted
196+
)
197+
artisatomic.log_and_print(flog, f"Read {len(dflevels) - 1:d} levels")
198+
199+
transitions = (
200+
gfall.select(
201+
[
202+
"atomic_number",
203+
"ion_charge",
204+
"energyabovegsinpercm_lower",
205+
"j_lower",
206+
"energyabovegsinpercm_upper",
207+
"j_upper",
208+
"wavelength_nm",
209+
"loggf",
210+
]
211+
)
212+
.with_columns(gf=10 ** pl.col("loggf"))
213+
.drop("loggf")
214+
.join(
215+
dflevels.lazy().select(
216+
energyabovegsinpercm_lower=pl.col("energyabovegsinpercm"),
217+
j_lower=pl.col("j"),
218+
levelid_lower=pl.col("levelid"),
219+
),
220+
on=["energyabovegsinpercm_lower", "j_lower"],
221+
how="left",
222+
)
223+
.join(
224+
dflevels.lazy().select(
225+
energyabovegsinpercm_upper=pl.col("energyabovegsinpercm"),
226+
j_upper=pl.col("j"),
227+
levelid_upper=pl.col("levelid"),
228+
),
229+
on=["energyabovegsinpercm_upper", "j_upper"],
230+
how="left",
231+
)
232+
.with_columns(
233+
# wavelengths are in nanometers, so multiply by 10 to get Angstroms
234+
A=pl.col("gf") / (1.49919e-16 * (2 * pl.col("j_upper") + 1) * (pl.col("wavelength_nm") * 10.0).pow(2))
235+
)
236+
.select(
237+
upperlevel=pl.col("levelid_upper"),
238+
lowerlevel=pl.col("levelid_lower"),
239+
A=pl.col("A"),
240+
coll_str=pl.lit(-1),
241+
)
242+
.collect()
243+
)
96244

97-
transitions, transition_count_of_level_name = read_lines_data(energy_levels, dflines)
245+
transition_count_of_level_name = {
246+
levelname: (
247+
transitions.select(((pl.col("lowerlevel") == levelid) | (pl.col("upperlevel") == levelid)).sum()).item()
248+
)
249+
for levelid, levelname in dflevels.select("levelid", "levelname").iter_rows(named=False)
250+
}
98251
artisatomic.log_and_print(flog, f"Read {len(transitions):d} transitions")
99252

100253
ionization_energy_in_ev = artisatomic.get_nist_ionization_energies_ev()[(atomic_number, ion_stage)]
101254
artisatomic.log_and_print(flog, f"ionization energy: {ionization_energy_in_ev} eV")
102255

103-
return ionization_energy_in_ev, energy_levels, transitions, transition_count_of_level_name
256+
return ionization_energy_in_ev, dflevels, transitions, transition_count_of_level_name
104257

105258

106259
def get_level_valence_n(levelname: str):

artisatomic/readqubdata.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#!/usr/bin/env python3
2+
import os
23
import typing as t
34
from collections import defaultdict
45
from collections import namedtuple
@@ -11,7 +12,12 @@
1112

1213
import artisatomic
1314

14-
tyndall_co3_path = (Path(__file__).parent.resolve() / ".." / "atomic-data-qub" / "co_tyndall").resolve()
15+
tyndall_co3_path = (
16+
Path(__file__).parent.resolve()
17+
/ ".."
18+
/ "atomic-data-qub"
19+
/ ("co_tyndall_test_sample" if os.environ.get("ARTISATOMIC_TESTMODE") == "1" else "co_tyndall")
20+
).resolve()
1521

1622
ryd_to_ev = 13.605693122994232
1723

atomic-data-kurucz/.gitignore

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
gfall*
2-
*.lines
3-
*.all
4-
creationdates.dat
1+
extendedatoms/*.lines*
2+
zztar/*.all*
3+
zztar.tar
73.7 KB
Binary file not shown.
1.11 KB
Binary file not shown.
1.65 KB
Binary file not shown.
4.38 KB
Binary file not shown.
2.36 KB
Binary file not shown.

0 commit comments

Comments
 (0)