From baf1ddadf1cd1dcc38e76dee986542ae3108b04b Mon Sep 17 00:00:00 2001 From: Marcel Mueller Date: Mon, 16 Dec 2024 11:25:38 +0100 Subject: [PATCH] `fixed_composition` feature (#95) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add 'fixed_composition' option; push 'check_config' into config-specific functions; minor adjustments Signed-off-by: Marcel Müller * update CHANGELOG.md Signed-off-by: Marcel Müller --------- Signed-off-by: Marcel Müller --- CHANGELOG.md | 2 + README.md | 6 +- mindlessgen.toml | 3 + src/mindlessgen/cli/cli_parser.py | 7 ++ .../molecules/generate_molecule.py | 99 ++++----------- src/mindlessgen/prog/config.py | 119 +++++++++++++++--- test/test_generate/test_generate_molecule.py | 52 +++++--- 7 files changed, 182 insertions(+), 106 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 77760b1..93421ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - clearer differentiation between the distinct scaling factors for the van der Waals radii - `README.md` with more detailed explanation of the element composition function - Default `max_cycles` for the generation & refinement set to 200 +- Allow fixed molecule compositions in a simpler way +- `check_config` now ConfigClass-specific ### Fixed - unit conversion for (currenly unused) vdW radii from the original Fortran project diff --git a/README.md b/README.md index f51cd16..1279950 100644 --- a/README.md +++ b/README.md @@ -87,12 +87,14 @@ If the path is not specified with `-c/--config`, `mindlessgen.toml` will be sear If neither a corresponding CLI command nor an entry in the configuration file is provided, the default values are used. The active configuration, including the default values, can be printed using `--print-config`. -### Element composition +#### Element composition There are two related aspects of the element composition: 1. **Which elements** should occur within the generated molecule? 2. **How many atoms** of the specified element should occur? - **Example 1**: `C:1-3, O:1-1, H:1-*` would result in a molecule with 1, 2, or 3 carbon atoms, exactly 1 oxygen atom, and between 1 and an undefined number of hydrogen atoms (i.e., at least 1). -- **Example 2**: `Na:10-10, In:10-10, O:20-20`. This example would result in a molecule with exactly 10 sodium atoms, 10 indium atoms, and 20 oxygen atoms. **For a fixed element composition, the number of atoms (40) has to be within the min_num_atoms and max_num_atom interval.** `mindlessgen` will consequently always return a molecule with exactly 40 atoms. +- **Example 2**: `Na:10-10, In:10-10, O:20-20`. This example would result in a molecule with exactly 10 sodium atoms, 10 indium atoms, and 20 oxygen atoms. +For fixing the whole molecule to this composition, set `fixed_composition` to `true`. +`mindlessgen` will consequently always return a molecule with exactly 40 atoms. > [!WARNING] > When using `orca` and specifying elements with `Z > 86`, ensure that the basis set you've selected is compatible with (super-)heavy elements like actinides. diff --git a/mindlessgen.toml b/mindlessgen.toml index 8ebf861..8151b53 100644 --- a/mindlessgen.toml +++ b/mindlessgen.toml @@ -38,6 +38,9 @@ contract_coords = true # > Elements that are not specified are only added by random selection. # > A star sign (*) can be used as a wildcard for integer value. element_composition = "C:2-3, H:1-2, O:1-2, N:1-*" +# > Set an exactly defined composition +# > CAUTION: Only possible if 'element_composition' is set to defined values. Example: "C:3-3, H:8-8, O:1-1" +fixed_composition = false # > Atom types that are not chosen for random selection. Format: ", , ..." # > CAUTION: This option is overridden by the 'element_composition' option. # > I.e., if an element is specified in 'element_composition' with an occurrence > 0, it will be added to the molecule anyway. diff --git a/src/mindlessgen/cli/cli_parser.py b/src/mindlessgen/cli/cli_parser.py index e68ef8d..12f02e2 100644 --- a/src/mindlessgen/cli/cli_parser.py +++ b/src/mindlessgen/cli/cli_parser.py @@ -149,6 +149,12 @@ def cli_parser(argv: Sequence[str] | None = None) -> dict: required=False, help="Define the charge of the molecule.", ) + parser.add_argument( + "--fixed-composition", + type=bool, + required=False, + help="Fix the element composition of the molecule.", + ) ### Refinement arguments ### parser.add_argument( @@ -287,6 +293,7 @@ def cli_parser(argv: Sequence[str] | None = None) -> dict: "scale_minimal_distance": args_dict["scale_minimal_distance"], "contract_coords": args_dict["contract_coords"], "molecular_charge": args_dict["molecular_charge"], + "fixed_composition": args_dict["fixed_composition"], } # XTB specific arguments rev_args_dict["xtb"] = {"xtb_path": args_dict["xtb_path"]} diff --git a/src/mindlessgen/molecules/generate_molecule.py b/src/mindlessgen/molecules/generate_molecule.py index 13a0ffd..a83e584 100644 --- a/src/mindlessgen/molecules/generate_molecule.py +++ b/src/mindlessgen/molecules/generate_molecule.py @@ -82,83 +82,34 @@ def generate_atom_list(cfg: GenerateConfig, verbosity: int = 1) -> np.ndarray: else: valid_elems = set_all_elem - natoms = np.zeros( - 103, dtype=int - ) # 103 is the number of accessible elements in the periodic table - - # Some sanity checks: - # - Check if the minimum number of atoms is smaller than the maximum number of atoms - if cfg.min_num_atoms is not None and cfg.max_num_atoms is not None: - if cfg.min_num_atoms > cfg.max_num_atoms: - raise ValueError( - "The minimum number of atoms is larger than the maximum number of atoms." - ) + natoms = np.zeros(103, dtype=int) # Support for up to element 103 (Lr) - # - Check if the summed number of minimally required atoms from cfg.element_composition - # is larger than the maximum number of atoms - if cfg.max_num_atoms is not None: - if ( - np.sum( - [ - cfg.element_composition.get(i, (0, 0))[0] - for i in cfg.element_composition - ] + if cfg.fixed_composition: + # Set the number of atoms for the fixed composition + for elem, count_range in cfg.element_composition.items(): + natoms[elem] = count_range[0] + if verbosity > 1: + print( + "Setting the number of atoms for the fixed composition. " + + f"Returning: \n{natoms}\n" ) - > cfg.max_num_atoms - ): - raise ValueError( - "The summed number of minimally required atoms " - + "from the fixed composition is larger than the maximum number of atoms." + # If the molecular charge is defined, and a fixed element composition is defined, check if the electrons are even. If not raise an error. + if cfg.molecular_charge: + protons = calculate_protons(natoms) + nel = protons - cfg.molecular_charge + f_elem = any( + count > 0 and (i in get_lanthanides() or i in get_actinides()) + for i, count in enumerate(natoms) ) - fixed_elem = False - # - Check if all defintions in cfg.element_composition - # are completely fixed (min and max are equal) - for elem, count_range in cfg.element_composition.items(): - # check if for all entries: min and max are both not None, and if min and max are equal. - # If both is true, set a boolean to True - if ( - count_range[0] is not None - and count_range[1] is not None - and count_range[0] == count_range[1] - ): - fixed_elem = True - else: - fixed_elem = False - break - # If the boolean is True, check if the summed number of fixed atoms - # is within the defined overall limits - if fixed_elem: - sum_fixed_atoms = np.sum( - [cfg.element_composition.get(i, (0, 0))[0] for i in cfg.element_composition] - ) - if cfg.min_num_atoms <= sum_fixed_atoms <= cfg.max_num_atoms: - # If the fixed composition is within the defined limits, - # set the number of atoms for the fixed composition - for elem, count_range in cfg.element_composition.items(): - natoms[elem] = count_range[0] - if verbosity > 1: - print( - "Fixed composition is within the defined limits. " - + "Setting the number of atoms for the fixed composition. " - + f"Returning: \n{natoms}\n" - ) - # If the molecular charge is defined, and a fixed element composition is defined, check if the electrons are even. If not raise an error. - if cfg.molecular_charge: - protons = calculate_protons(natoms) - nel = protons - cfg.molecular_charge - f_elem = any( - count > 0 and (i in get_lanthanides() or i in get_actinides()) - for i, count in enumerate(natoms) + if (f_elem and calculate_ligand_electrons(natoms, nel) % 2 != 0) or ( + not f_elem and nel % 2 != 0 + ): + raise ValueError( + "Both fixed charge and fixed composition are defined. " + + "Please only define one of them." + + "Or ensure that the fixed composition is closed shell." ) - if (f_elem and calculate_ligand_electrons(natoms, nel) % 2 != 0) or ( - not f_elem and nel % 2 != 0 - ): - raise SystemExit( - "Both fixed charge and fixed composition are defined. " - + "Please only define one of them." - + "Or ensure that the fixed composition is closed shell." - ) - return natoms + return natoms # Reasoning for the parameters in the following sections: # - The number of the atoms added by default (DefaultRandom + AddOrganicAtoms) @@ -524,7 +475,7 @@ def fixed_charge_elem_correction( if verbosity > 1: print(f"Adding atom type {random_elem} for charge...") return natoms - elif natoms[random_elem] > min_count and num_atoms > cfg.min_num_atoms: + if natoms[random_elem] > min_count and num_atoms > cfg.min_num_atoms: natoms[random_elem] -= 1 if verbosity > 1: print(f"Removing atom type {random_elem} for charge...") diff --git a/src/mindlessgen/prog/config.py b/src/mindlessgen/prog/config.py index 2edea7a..d598b4a 100644 --- a/src/mindlessgen/prog/config.py +++ b/src/mindlessgen/prog/config.py @@ -8,6 +8,8 @@ from abc import ABC, abstractmethod import warnings import multiprocessing as mp + +import numpy as np import toml from ..molecules import PSE_NUMBERS @@ -167,6 +169,24 @@ def write_xyz(self, write_xyz: bool): raise TypeError("Write xyz should be a boolean.") self._write_xyz = write_xyz + def check_config(self, verbosity: int = 1) -> None: + ### GeneralConfig checks ### + # lower number of the available cores and the configured parallelism + num_cores = min(mp.cpu_count(), self.parallel) + if self.parallel > mp.cpu_count() and verbosity > -1: + warnings.warn( + f"Number of cores requested ({self.parallel}) is greater " + + f"than the number of available cores ({mp.cpu_count()})." + + f"Using {num_cores} cores instead." + ) + + if num_cores > 1 and verbosity > 0: + # raise warning that parallelization will disable verbosity + warnings.warn( + "Parallelization will disable verbosity during iterative search. " + + "Set '--verbosity 0' or '-P 1' to avoid this warning, or simply ignore it." + ) + class GenerateConfig(BaseConfig): """ @@ -184,6 +204,7 @@ def __init__(self: GenerateConfig) -> None: self._scale_minimal_distance: float = 0.8 self._contract_coords: bool = True self._molecular_charge: int | None = None + self._fixed_composition: bool = False def get_identifier(self) -> str: return "generate" @@ -484,6 +505,80 @@ def molecular_charge(self, molecular_charge: str | int): else: raise TypeError("Molecular charge should be a string or an integer.") + @property + def fixed_composition(self): + """ + Get the fixed_composition flag. + """ + return self._fixed_composition + + @fixed_composition.setter + def fixed_composition(self, fixed_composition: bool): + """ + Set the fixed_composition flag. + """ + if not isinstance(fixed_composition, bool): + raise TypeError("Fixed composition should be a boolean.") + self._fixed_composition = fixed_composition + + def check_config(self, verbosity: int = 1) -> None: + """ + GenerateConfig checks for any incompatibilities that are imaginable + """ + # - Check if the minimum number of atoms is smaller than the maximum number of atoms + if self.min_num_atoms is not None and self.max_num_atoms is not None: + if self.min_num_atoms > self.max_num_atoms: + raise ValueError( + "The minimum number of atoms is larger than the maximum number of atoms." + ) + # - Check if the summed number of minimally required atoms from cfg.element_composition + # is larger than the maximum number of atoms + if self.max_num_atoms is not None: + if ( + np.sum( + [ + self.element_composition.get(i, (0, 0))[0] + if self.element_composition.get(i, (0, 0))[0] is not None + else 0 + for i in self.element_composition + ] + ) + > self.max_num_atoms + ): + raise ValueError( + "The summed number of minimally required atoms " + + "from the fixed composition is larger than the maximum number of atoms." + ) + if self.fixed_composition: + # - Check if all defintions in cfg.element_composition + # are completely fixed (min and max are equal) + for elem, count_range in self.element_composition.items(): + # check if for all entries: min and max are both not None, and if min and max are equal. + if ( + (count_range[0] is None) + or (count_range[1] is None) + or (count_range[0] != count_range[1]) + ): + raise ValueError( + f"Element {elem} is not completely fixed in the element composition. " + + "Usage together with fixed_composition is not possible." + ) + # Check if the summed number of fixed atoms + # is within the defined overall limits + sum_fixed_atoms = np.sum( + [ + self.element_composition.get(i, (0, 0))[0] + for i in self.element_composition + ] + ) + if self.min_num_atoms is not None and not ( + self.min_num_atoms <= sum_fixed_atoms + ): + raise ValueError( + "The summed number of fixed atoms from the fixed composition " + + "is not within the range of min_num_atoms and max_num_atoms." + ) + class RefineConfig(BaseConfig): """ @@ -824,15 +919,15 @@ def check_config(self, verbosity: int = 1) -> None: Checks ConfigClass for any incompatibilities that are imaginable """ - # lower number of the available cores and the configured parallelism - num_cores = min(mp.cpu_count(), self.general.parallel) - if self.general.parallel > mp.cpu_count() and verbosity > -1: - warnings.warn( - f"Number of cores requested ({self.general.parallel}) is greater " - + f"than the number of available cores ({mp.cpu_count()})." - + f"Using {num_cores} cores instead." - ) + ### Config-specific checks ### + for attr_name in dir(self): + attr_value = getattr(self, attr_name) + if isinstance(attr_value, BaseConfig): + if hasattr(attr_value, "check_config"): + attr_value.check_config(verbosity) + ### Overlapping checks ### + num_cores = min(mp.cpu_count(), self.general.parallel) if num_cores > 1 and self.postprocess.debug and verbosity > -1: # raise warning that debugging of postprocessing will disable parallelization warnings.warn( @@ -840,14 +935,6 @@ def check_config(self, verbosity: int = 1) -> None: + "with possibly similar errors in parallel mode. " + "Don't be confused!" ) - - if num_cores > 1 and verbosity > 0: - # raise warning that parallelization will disable verbosity - warnings.warn( - "Parallelization will disable verbosity during iterative search. " - + "Set '--verbosity 0' or '-P 1' to avoid this warning, or simply ignore it." - ) - if self.refine.engine == "xtb": # Check for f-block elements in forbidden elements if self.generate.forbidden_elements: diff --git a/test/test_generate/test_generate_molecule.py b/test/test_generate/test_generate_molecule.py index 8af3623..a020536 100644 --- a/test/test_generate/test_generate_molecule.py +++ b/test/test_generate/test_generate_molecule.py @@ -110,8 +110,9 @@ def test_generate_atom_list_with_composition( if expected_error: with pytest.raises(expected_error): - generate_atom_list(config, verbosity=1) + config.check_config() else: + config.check_config() atom_list = generate_atom_list(config, verbosity=1) for elem, count in expected_atom_counts.items(): if isinstance(count, tuple): @@ -295,15 +296,6 @@ def test_check_distances(xyz, ati, scale_minimal_bondlength, expected, descripti assert check_distances(xyz, ati, scale_minimal_bondlength) == expected -def test_generate_atom_list_min_larger_than_max(default_generate_config): - """Test the generate_atom_list function when the minimum number of atoms is larger than the maximum number of atoms.""" - default_generate_config.min_num_atoms = 10 - default_generate_config.max_num_atoms = 5 - - with pytest.raises(ValueError): - generate_atom_list(default_generate_config, verbosity=1) - - # Test to ensure non-integer values for min/max atoms raise errors @pytest.mark.parametrize( "min_atoms, max_atoms, expected_error", @@ -344,6 +336,37 @@ def test_generate_atom_list_with_overlapping_forbidden_elements( assert np.sum([atom_list[z] for z in expected_atoms]) == 0 +# Test fixed composition with varying min/max atoms and element compositions +@pytest.mark.parametrize( + "min_atoms, max_atoms, element_composition, fixed_composition, should_raise", + [ + (10, 5, "C:3-3, H:3-3", False, True), + (5, 10, "C:5-10, H:6-10", False, True), + (5, 10, "C:5-10, H:6-6", True, True), + (5, 10, "C:5-5, H:6-6", True, True), + (5, 10, "C:2-5, H:3-5", False, False), + (5, 10, "C:3-3, H:3-3", True, False), + (5, 10, "C:*-3, H:2-*", True, True), + ], +) +def test_check_config_variations( + min_atoms, max_atoms, element_composition, fixed_composition, should_raise +): + config = GenerateConfig() + config.min_num_atoms = min_atoms + config.max_num_atoms = max_atoms + config.element_composition = element_composition + config.fixed_composition = fixed_composition + if should_raise: + with pytest.raises(ValueError): + config.check_config() + else: + try: + config.check_config() + except ValueError: + pytest.fail("check_config() raised ValueError unexpectedly!") + + # Test behavior when composition is empty but min/max are set def test_generate_atom_list_with_empty_composition(default_generate_config): """Ensure empty compositions don't lead to unexpected behaviors.""" @@ -433,14 +456,15 @@ def test_fixed_charge_and_fixed_composition( ): """Test the hydrogen correction for a fixed charge""" default_generate_config.molecular_charge = "3" - default_generate_config.min_num_atoms = 5 - default_generate_config.max_num_atoms = 10 + default_generate_config.fixed_composition = True default_generate_config.element_composition = "H:5-5, C:2-2, N:1-1, O:1-1" # Check if the right system exit is raised with pytest.raises( - SystemExit, - match="Both fixed charge and fixed composition are defined. Please only define one of them.Or ensure that the fixed composition is closed shell.", + ValueError, + match="Both fixed charge and fixed composition are defined. " + + "Please only define one of them." + + "Or ensure that the fixed composition is closed shell.", ): generate_random_molecule(default_generate_config, verbosity=1)