bessagroup · mpvanderschelling · Nov 2, 2023 · Nov 2, 2023 · Nov 2, 2023 · Nov 2, 2023
diff --git a/docs/source/rst_doc_files/classes/design/experimentsample.rst b/docs/source/rst_doc_files/classes/design/experimentsample.rst
@@ -123,13 +123,7 @@ A reference (:code:`Path`) will be saved to the :attr:`~f3dasm.design.Experiment
    ├── my_experiment_output.csv
    └── my_experiment_jobs.pkl
 
-In the :attr:`~f3dasm.design.ExperimentData.output_data`, a reference to the stored object (e.g. :code:`my_project/output_1/0.npy`) will be automatically appended to the `path_<output parameter name>` parameter.
-
-.. code-block:: python
-
-    >>> experiment_sample['output_numpy']
-    'my_project/output_numpy/0.npy'
-
+In the :attr:`~f3dasm.design.ExperimentData.output_data`, a reference to the stored object (e.g. :code:`my_project/output_1/0.npy`) will be automatically appended to the parameter.
 
 
 :mod:`f3dasm` has built-in storing functions for numpy :class:`~numpy.ndarray`, pandas :class:`~pandas.DataFrame` and xarray :class:`~xarray.DataArray` and :class:`~xarray.Dataset`. 

diff --git a/src/f3dasm/_src/design/domain.py b/src/f3dasm/_src/design/domain.py
@@ -24,7 +24,7 @@
 # Local
 from .parameter import (CategoricalParameter, CategoricalType,
                         ConstantParameter, ContinuousParameter,
-                        DiscreteParameter, Parameter)
+                        DiscreteParameter, OutputParameter, Parameter)
 
 #                                                          Authorship & Credits
 # =============================================================================
@@ -36,18 +36,6 @@
 # =============================================================================
 
 
-class _Columns:
-    names: List[str]
-
-
-class _Data:
-    data: pd.DataFrame
-    columns: _Columns
-
-    def to_dataframe() -> pd.DataFrame:
-        ...
-
-
 @dataclass
 class Domain:
     """Main class for defining the domain of the design of experiments.
@@ -59,6 +47,7 @@ class Domain:
     """
 
     space: Dict[str, Parameter] = field(default_factory=dict)
+    output_space: Dict[str, OutputParameter] = field(default_factory=dict)
 
     def __len__(self) -> int:
         """The len() method returns the number of parameters"""
@@ -164,44 +153,51 @@ def from_yaml(cls: Type[Domain], yaml: DictConfig) -> Domain:
              for name, param in yaml.items()})
 
     @classmethod
-    def from_dataframe(cls, df: pd.DataFrame) -> Domain:
+    def from_dataframe(cls, df_input: pd.DataFrame,
+                       df_output: pd.DataFrame) -> Domain:
         """Initializes a Domain from a pandas DataFrame.
 
         Parameters
         ----------
-        df : pd.DataFrame
+        df_input : pd.DataFrame
             DataFrame containing the input parameters.
+        df_output : pd.DataFrame
+            DataFrame containing the output parameters.
 
         Returns
         -------
         Domain
             Domain object
         """
-        space = {}
-        for name, type in df.dtypes.items():
+        input_space = {}
+        for name, type in df_input.dtypes.items():
             if type == 'float64':
-                if float(df[name].min()) == float(df[name].max()):
-                    space[name] = ConstantParameter(
-                        value=float(df[name].min()))
+                if float(df_input[name].min()) == float(df_input[name].max()):
+                    input_space[name] = ConstantParameter(
+                        value=float(df_input[name].min()))
                     continue
 
-                space[name] = ContinuousParameter(lower_bound=float(
-                    df[name].min()), upper_bound=float(df[name].max()))
+                input_space[name] = ContinuousParameter(lower_bound=float(
+                    df_input[name].min()),
+                    upper_bound=float(df_input[name].max()))
             elif type == 'int64':
-                if int(df[name].min()) == int(df[name].max()):
-                    space[name] = ConstantParameter(value=int(df[name].min()))
+                if int(df_input[name].min()) == int(df_input[name].max()):
+                    input_space[name] = ConstantParameter(
+                        value=int(df_input[name].min()))
                     continue
 
-                space[name] = DiscreteParameter(lower_bound=int(
-                    df[name].min()), upper_bound=int(df[name].max()))
+                input_space[name] = DiscreteParameter(lower_bound=int(
+                    df_input[name].min()),
+                    upper_bound=int(df_input[name].max()))
             else:
-                space[name] = CategoricalParameter(df[name].unique().tolist())
+                input_space[name] = CategoricalParameter(
+                    df_input[name].unique().tolist())
 
-        return cls(space=space)
+        output_space = {}
+        for name in df_output.columns:
+            output_space[name] = OutputParameter(to_disk=False)
 
-    @classmethod
-    def from_data(cls: Type[Domain], data: _Data) -> Domain:
-        return cls.from_dataframe(data.to_dataframe())
+        return cls(space=input_space, output_space=output_space)
 
 #                                                                        Export
 # =============================================================================
@@ -369,6 +365,29 @@ def add(self, name: str, space: Parameter):
         """
         self.space[name] = space
 
+    def add_output(self, name: str, to_disk: bool):
+        """Add a new output parameter to the domain.
+
+        Parameters
+        ----------
+        name : str
+            Name of the output parameter.
+        to_disk : bool
+            Whether to store the output parameter on disk.
+
+        Example
+        -------
+        >>> domain = Domain()
+        >>> domain.add_output('param1', True)
+        >>> domain.space
+        {'param1': OutputParameter(to_disk=True)}
+        """
+        if name in self.output_space:
+            raise KeyError(
+                f"Parameter {name} already exists in the domain! \
+                     Choose a different name.")
+
+        self.output_space[name] = OutputParameter(to_disk)
 #                                                                       Getters
 # =============================================================================
 
@@ -649,6 +668,35 @@ def _all_input_continuous(self) -> bool:
         """Check if all input parameters are continuous"""
         return len(self) == len(self._filter(ContinuousParameter))
 
+    def check_output(self, names: List[str]):
+        for output_name in names:
+            if not self.is_in_output(output_name):
+                self.add_output(output_name, to_disk=False)
+
+    def is_in_output(self, output_name: str) -> bool:
+        """Check if output is in the domain
+
+        Parameters
+        ----------
+        output_name : str
+            Name of the output
+
+        Returns
+        -------
+        bool
+            True if output is in the domain, False otherwise
+
+        Example
+        -------
+        >>> domain = Domain()
+        >>> domain.add_output('output1')
+        >>> domain.is_in_output('output1')
+        True
+        >>> domain.is_in_output('output2')
+        False
+        """
+        return output_name in self.output_space
+
 
 def make_nd_continuous_domain(bounds: np.ndarray | List[List[float]],
                               dimensionality: int) -> Domain:

diff --git a/src/f3dasm/_src/design/parameter.py b/src/f3dasm/_src/design/parameter.py
@@ -34,6 +34,11 @@ class Parameter:
     _type: ClassVar[str] = field(init=False, default="object")
 
 
+@dataclass
+class OutputParameter(Parameter):
+    to_disk: bool = field(default=False)
+
+
 @dataclass
 class ConstantParameter(Parameter):
     """Create a search space parameter that is constant.

diff --git a/src/f3dasm/_src/experimentdata/_columns.py b/src/f3dasm/_src/experimentdata/_columns.py
@@ -0,0 +1,110 @@
+"""
+The _Columns class is used to order and track the parameter names of the data
+columns. This class is not intended to be used directly by the user.
+ It is used by the _Data class to provide an interface to datatypes that do not
+  have a column structure, such as numpy arrays.
+
+Notes
+-----
+
+For the default back-end of _Data, this class is obsolete since pandas
+ DataFrames have a column structure. However, this class is intended to be a
+ uniform interface to data that does not have a column structure.
+"""
+
+#                                                                       Modules
+# =============================================================================
+
+from __future__ import annotations
+
+# Standard
+from typing import Dict, List, Optional
+
+#                                                          Authorship & Credits
+# =============================================================================
+__author__ = 'Martin van der Schelling ([email protected])'
+__credits__ = ['Martin van der Schelling']
+__status__ = 'Stable'
+# =============================================================================
+#
+# =============================================================================
+
+
+class _Columns:
+    def __init__(self, columns: Optional[Dict[str, None]] = None):
+        """Class that keeps track of the names and order of parameters
+         in the raw data.
+
+        Parameters
+        ----------
+        columns: Dict[str, None], optional
+            dictionary with names as column names and None as values
+            , by default None
+
+        Notes
+        -----
+        The datatype of a dict with nonsensical values is used to prevent
+         duplicate keys. This is because the dict is used as a set.
+        """
+        if columns is None:
+            columns = {}
+
+        self.columns: Dict[str, None] = columns
+
+    def __repr__(self) -> str:
+        """Representation of the _Columns object."""
+        return self.columns.keys().__repr__()
+
+    @property
+    def names(self) -> List[str]:
+        """List of the names of the columns.
+
+        Returns
+        -------
+        List[str]
+            list of the names of the columns
+        """
+        return list(self.columns.keys())
+
+    def add(self, name: str):
+        """Add a column to the _Columns object.
+
+        Parameters
+        ----------
+        name: str
+            name of the column to add
+        """
+        self.columns[name] = None
+
+    def iloc(self, name: str | List[str]) -> List[int]:
+        """Get the index of a column.
+
+        Parameters
+        ----------
+        name: str | List[str]
+            name of the column(s) to get the index of
+
+        Returns
+        -------
+        List[int]
+            list of the indices of the columns
+        """
+        if isinstance(name, str):
+            name = [name]
+
+        _indices = []
+        for n in name:
+            _indices.append(self.names.index(n))
+        return _indices
+
+    def rename(self, old_name: str, new_name: str):
+        """Replace the name of a column.
+
+        Parameters
+        ----------
+        old_name: str
+            name of the column to replace
+        new_name: str
+            name of the column to replace with
+        """
+        self.columns[new_name] = self.columns.pop(old_name)