Skip to content

Commit 21fb3b6

Browse files
authored
Merge pull request #358 from python-adaptive/as-pandas-dataframe
Add getting learner's data as pandas.DataFrame; add learner.to_dataframe method
2 parents c59405a + e091772 commit 21fb3b6

16 files changed

+1034
-18
lines changed

adaptive/learner/average_learner.py

Lines changed: 89 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,20 @@
88

99
from adaptive.learner.base_learner import BaseLearner
1010
from adaptive.notebook_integration import ensure_holoviews
11-
from adaptive.types import Float, Real
12-
from adaptive.utils import cache_latest
11+
from adaptive.types import Float, Int, Real
12+
from adaptive.utils import (
13+
assign_defaults,
14+
cache_latest,
15+
partial_function_from_dataframe,
16+
)
17+
18+
try:
19+
import pandas
20+
21+
with_pandas = True
22+
23+
except ModuleNotFoundError:
24+
with_pandas = False
1325

1426

1527
class AverageLearner(BaseLearner):
@@ -70,6 +82,80 @@ def to_numpy(self):
7082
"""Data as NumPy array of size (npoints, 2) with seeds and values."""
7183
return np.array(sorted(self.data.items()))
7284

85+
def to_dataframe(
86+
self,
87+
with_default_function_args: bool = True,
88+
function_prefix: str = "function.",
89+
seed_name: str = "seed",
90+
y_name: str = "y",
91+
) -> pandas.DataFrame:
92+
"""Return the data as a `pandas.DataFrame`.
93+
94+
Parameters
95+
----------
96+
with_default_function_args : bool, optional
97+
Include the ``learner.function``'s default arguments as a
98+
column, by default True
99+
function_prefix : str, optional
100+
Prefix to the ``learner.function``'s default arguments' names,
101+
by default "function."
102+
seed_name : str, optional
103+
Name of the ``seed`` parameter, by default "seed"
104+
y_name : str, optional
105+
Name of the output value, by default "y"
106+
107+
Returns
108+
-------
109+
pandas.DataFrame
110+
111+
Raises
112+
------
113+
ImportError
114+
If `pandas` is not installed.
115+
"""
116+
if not with_pandas:
117+
raise ImportError("pandas is not installed.")
118+
df = pandas.DataFrame(sorted(self.data.items()), columns=[seed_name, y_name])
119+
df.attrs["inputs"] = [seed_name]
120+
df.attrs["output"] = y_name
121+
if with_default_function_args:
122+
assign_defaults(self.function, df, function_prefix)
123+
return df
124+
125+
def load_dataframe(
126+
self,
127+
df: pandas.DataFrame,
128+
with_default_function_args: bool = True,
129+
function_prefix: str = "function.",
130+
seed_name: str = "seed",
131+
y_name: str = "y",
132+
):
133+
"""Load data from a `pandas.DataFrame`.
134+
135+
If ``with_default_function_args`` is True, then ``learner.function``'s
136+
default arguments are set (using `functools.partial`) from the values
137+
in the `pandas.DataFrame`.
138+
139+
Parameters
140+
----------
141+
df : pandas.DataFrame
142+
The data to load.
143+
with_default_function_args : bool, optional
144+
The ``with_default_function_args`` used in ``to_dataframe()``,
145+
by default True
146+
function_prefix : str, optional
147+
The ``function_prefix`` used in ``to_dataframe``, by default "function."
148+
seed_name : str, optional
149+
The ``seed_name`` used in ``to_dataframe``, by default "seed"
150+
y_name : str, optional
151+
The ``y_name`` used in ``to_dataframe``, by default "y"
152+
"""
153+
self.tell_many(df[seed_name].values, df[y_name].values)
154+
if with_default_function_args:
155+
self.function = partial_function_from_dataframe(
156+
self.function, df, function_prefix
157+
)
158+
73159
def ask(self, n: int, tell_pending: bool = True) -> tuple[list[int], list[Float]]:
74160
points = list(range(self.n_requested, self.n_requested + n))
75161

@@ -87,7 +173,7 @@ def ask(self, n: int, tell_pending: bool = True) -> tuple[list[int], list[Float]
87173
self.tell_pending(p)
88174
return points, loss_improvements
89175

90-
def tell(self, n: int, value: Real) -> None:
176+
def tell(self, n: Int, value: Real) -> None:
91177
if n in self.data:
92178
# The point has already been added before.
93179
return

adaptive/learner/average_learner1D.py

Lines changed: 120 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,16 @@
1414

1515
from adaptive.learner.learner1D import Learner1D, _get_intervals
1616
from adaptive.notebook_integration import ensure_holoviews
17-
from adaptive.types import Real
17+
from adaptive.types import Int, Real
18+
from adaptive.utils import assign_defaults, partial_function_from_dataframe
19+
20+
try:
21+
import pandas
22+
23+
with_pandas = True
24+
25+
except ModuleNotFoundError:
26+
with_pandas = False
1827

1928
Point = Tuple[int, Real]
2029
Points = List[Point]
@@ -127,6 +136,112 @@ def min_samples_per_point(self) -> int:
127136
return 0
128137
return min(self._number_samples.values())
129138

139+
def to_numpy(self, mean: bool = False) -> np.ndarray:
140+
if mean:
141+
return super().to_numpy()
142+
else:
143+
return np.array(
144+
[
145+
(seed, x, *np.atleast_1d(y))
146+
for x, seed_y in self._data_samples.items()
147+
for seed, y in seed_y.items()
148+
]
149+
)
150+
151+
def to_dataframe(
152+
self,
153+
mean: bool = False,
154+
with_default_function_args: bool = True,
155+
function_prefix: str = "function.",
156+
seed_name: str = "seed",
157+
x_name: str = "x",
158+
y_name: str = "y",
159+
) -> pandas.DataFrame:
160+
"""Return the data as a `pandas.DataFrame`.
161+
162+
Parameters
163+
----------
164+
with_default_function_args : bool, optional
165+
Include the ``learner.function``'s default arguments as a
166+
column, by default True
167+
function_prefix : str, optional
168+
Prefix to the ``learner.function``'s default arguments' names,
169+
by default "function."
170+
seed_name : str, optional
171+
Name of the ``seed`` parameter, by default "seed"
172+
x_name : str, optional
173+
Name of the ``x`` parameter, by default "x"
174+
y_name : str, optional
175+
Name of the output value, by default "y"
176+
177+
Returns
178+
-------
179+
pandas.DataFrame
180+
181+
Raises
182+
------
183+
ImportError
184+
If `pandas` is not installed.
185+
"""
186+
if not with_pandas:
187+
raise ImportError("pandas is not installed.")
188+
if mean:
189+
data = sorted(self.data.items())
190+
columns = [x_name, y_name]
191+
else:
192+
data = [
193+
(seed, x, y)
194+
for x, seed_y in sorted(self._data_samples.items())
195+
for seed, y in sorted(seed_y.items())
196+
]
197+
columns = [seed_name, x_name, y_name]
198+
df = pandas.DataFrame(data, columns=columns)
199+
df.attrs["inputs"] = [seed_name, x_name]
200+
df.attrs["output"] = y_name
201+
if with_default_function_args:
202+
assign_defaults(self.function, df, function_prefix)
203+
return df
204+
205+
def load_dataframe(
206+
self,
207+
df: pandas.DataFrame,
208+
with_default_function_args: bool = True,
209+
function_prefix: str = "function.",
210+
seed_name: str = "seed",
211+
x_name: str = "x",
212+
y_name: str = "y",
213+
):
214+
"""Load data from a `pandas.DataFrame`.
215+
216+
If ``with_default_function_args`` is True, then ``learner.function``'s
217+
default arguments are set (using `functools.partial`) from the values
218+
in the `pandas.DataFrame`.
219+
220+
Parameters
221+
----------
222+
df : pandas.DataFrame
223+
The data to load.
224+
with_default_function_args : bool, optional
225+
The ``with_default_function_args`` used in ``to_dataframe()``,
226+
by default True
227+
function_prefix : str, optional
228+
The ``function_prefix`` used in ``to_dataframe``, by default "function."
229+
seed_name : str, optional
230+
The ``seed_name`` used in ``to_dataframe``, by default "seed"
231+
x_name : str, optional
232+
The ``x_name`` used in ``to_dataframe``, by default "x"
233+
y_name : str, optional
234+
The ``y_name`` used in ``to_dataframe``, by default "y"
235+
"""
236+
# Were using zip instead of df[[seed_name, x_name]].values because that will
237+
# make the seeds into floats
238+
seed_x = list(zip(df[seed_name].values.tolist(), df[x_name].values.tolist()))
239+
self.tell_many(seed_x, df[y_name].values)
240+
if with_default_function_args:
241+
self.function = partial_function_from_dataframe(
242+
self.function, df, function_prefix
243+
)
244+
130245
def ask(self, n: int, tell_pending: bool = True) -> tuple[Points, list[float]]:
131246
"""Return 'n' points that are expected to maximally reduce the loss."""
132247
# If some point is undersampled, resample it
@@ -362,7 +477,9 @@ def _calc_error_in_mean(self, ys: Iterable[Real], y_avg: Real, n: int) -> float:
362477
t_student = scipy.stats.t.ppf(1 - self.alpha, df=n - 1)
363478
return t_student * (variance_in_mean / n) ** 0.5
364479

365-
def tell_many(self, xs: Points, ys: Sequence[Real]) -> None:
480+
def tell_many(
481+
self, xs: Points | np.ndarray, ys: Sequence[Real] | np.ndarray
482+
) -> None:
366483
# Check that all x are within the bounds
367484
# TODO: remove this requirement, all other learners add the data
368485
# but ignore it going forward.
@@ -373,7 +490,7 @@ def tell_many(self, xs: Points, ys: Sequence[Real]) -> None:
373490
)
374491

375492
# Create a mapping of points to a list of samples
376-
mapping: DefaultDict[Real, DefaultDict[int, Real]] = defaultdict(
493+
mapping: DefaultDict[Real, DefaultDict[Int, Real]] = defaultdict(
377494
lambda: defaultdict(dict)
378495
)
379496
for (seed, x), y in zip(xs, ys):

adaptive/learner/balancing_learner.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,14 @@
1111
from adaptive.notebook_integration import ensure_holoviews
1212
from adaptive.utils import cache_latest, named_product, restore
1313

14+
try:
15+
import pandas
16+
17+
with_pandas = True
18+
19+
except ModuleNotFoundError:
20+
with_pandas = False
21+
1422

1523
def dispatch(child_functions, arg):
1624
index, x = arg
@@ -381,6 +389,55 @@ def from_product(cls, f, learner_type, learner_kwargs, combos):
381389
learners.append(learner)
382390
return cls(learners, cdims=arguments)
383391

392+
def to_dataframe(self, index_name: str = "learner_index", **kwargs):
393+
"""Return the data as a concatenated `pandas.DataFrame` from child learners.
394+
395+
Parameters
396+
----------
397+
index_name : str, optional
398+
The name of the index column indicating the learner index,
399+
by default "learner_index".
400+
**kwargs : dict
401+
Keyword arguments passed to each ``child_learner.to_dataframe(**kwargs)``.
402+
403+
Returns
404+
-------
405+
pandas.DataFrame
406+
407+
Raises
408+
------
409+
ImportError
410+
If `pandas` is not installed.
411+
"""
412+
if not with_pandas:
413+
raise ImportError("pandas is not installed.")
414+
dfs = []
415+
for i, learner in enumerate(self.learners):
416+
df = learner.to_dataframe(**kwargs)
417+
cols = list(df.columns)
418+
df[index_name] = i
419+
df = df[[index_name] + cols]
420+
dfs.append(df)
421+
df = pandas.concat(dfs, axis=0, ignore_index=True)
422+
return df
423+
424+
def load_dataframe(
425+
self, df: pandas.DataFrame, index_name: str = "learner_index", **kwargs
426+
):
427+
"""Load the data from a `pandas.DataFrame` into the child learners.
428+
429+
Parameters
430+
----------
431+
df : pandas.DataFrame
432+
DataFrame with the data to load.
433+
index_name : str, optional
434+
The ``index_name`` used in `to_dataframe`, by default "learner_index".
435+
**kwargs : dict
436+
Keyword arguments passed to each ``child_learner.load_dataframe(**kwargs)``.
437+
"""
438+
for i, gr in df.groupby(index_name):
439+
self.learners[i].load_dataframe(gr, **kwargs)
440+
384441
def save(self, fname, compress=True):
385442
"""Save the data of the child learners into pickle files
386443
in a directory.

0 commit comments

Comments
 (0)