Skip to content

Commit

Permalink
simplify AST building
Browse files Browse the repository at this point in the history
  • Loading branch information
InnocentBug committed Dec 17, 2024
1 parent 91c0466 commit 41f729e
Show file tree
Hide file tree
Showing 11 changed files with 171 additions and 219 deletions.
100 changes: 65 additions & 35 deletions src/gbigsmiles/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,49 +15,79 @@
"Please make sure to install this module correctly via setuptools with setuptools_scm activated to generate a `_version.py` file."
) from exc

from .atom import Atom
from .bond import BondDescriptor
from .core import _GLOBAL_RNG, BigSMILESbase, reaction_graph_to_dot_string
from .distribution import Distribution, FlorySchulz, Gauss
from .exception import (
GBigSMILESError,
GBigSMILESInitNotEnoughError,
GBigSMILESInitTooMuchError,
GBigSMILESParsingError,
from .atom import (
AliphaticOrganic,
AromaticOrganic,
AromaticSymbol,
Atom,
AtomCharge,
AtomClass,
AtomSymbol,
BracketAtom,
Chiral,
HCount,
Isotope,
)
from .graph_generate import AtomGraph
from .mixture import Mixture
from .mol_prob import get_ensemble_prob
from .molecule import Molecule
from .stochastic import Stochastic
from .system import System
from .token import SmilesToken
from .transformer import GBigSMILESTransformer
from .util import camel_to_snake, snake_to_camel
from .bond import (
BondDescriptor,
BondDescriptorGeneration,
BondDescriptorSymbol,
BondDescriptorSymbolIdx,
BondSymbol,
InnerBondDescriptor,
RingBond,
SimpleBondDescriptor,
TerminalBondDescriptor,
)
from .core import BigSMILESbase
from .distribution import (
FlorySchulz,
Gauss,
LogNormal,
Poisson,
StochasticDistribution,
Uniform,
)
from .parser import get_global_parser
from .transformer import GBigSMILESTransformer, get_global_transformer
from .util import camel_to_snake, get_global_rng, snake_to_camel

# from .graph_generate import AtomGraph
# from .mixture import Mixture
# from .mol_prob import get_ensemble_prob
# from .molecule import Molecule
# from .stochastic import Stochastic
# from .system import System
# from .token import SmilesToken

__all__ = [
"__version__",
"version_tuple",
"Atom",
"BracketAtom",
"Isotope",
"AtomSymbol",
"Chiral",
"HCount",
"AtomCharge",
"AtomClass",
"AromaticSymbol",
"AliphaticOrganic",
"AromaticOrganic",
"BondSymbol",
"RingBond",
"BondDescriptorSymbol",
"BondDescriptorSymbolIdx",
"BondDescriptorGeneration",
"InnerBondDescriptor",
"BondDescriptor",
"_GLOBAL_RNG",
"SimpleBondDescriptor",
"TerminalBondDescriptor",
"BigSMILESbase",
"reaction_graph_to_dot_string",
"Distribution",
"FlorySchulz",
"Gauss",
"Atom",
"AtomGraph",
"Mixture",
"get_ensemble_prob",
"Molecule",
"Stochastic",
"System",
"SmilesToken",
"GBigSMILESError",
"GBigSMILESParsingError",
"GBigSMILESInitNotEnoughError",
"GBigSMILESInitTooMuchError",
"camel_to_snake",
"snake_to_camel",
"get_global_rng",
"GBigSMILESTransformer",
"get_global_transformer",
"get_global_parser",
]
6 changes: 3 additions & 3 deletions src/gbigsmiles/atom.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import lark

from .core import BigSMILESbase
from .exception import GBigSMILESParsingError, GBigSMILESTooManyTokens
from .exception import ParsingError, TooManyTokens


class Atom(BigSMILESbase):
Expand Down Expand Up @@ -33,7 +33,7 @@ def __init__(self, children: list):
for child in self._children:
if isinstance(child, AtomSymbol):
if self._symbol is not None:
raise GBigSMILESTooManyTokens(self.__class__, self._symbol, child)
raise TooManyTokens(self.__class__, self._symbol, child)
self._symbol = child

def generate_string(self, extension):
Expand Down Expand Up @@ -294,7 +294,7 @@ def __init__(self, children: list):
super().__init__(children)

if str(self._children[0]) != "H":
raise GBigSMILESParsingError(self._children[0])
raise ParsingError(self._children[0])

if len(self._children) > 1:
self._count = int(self._children[1])
Expand Down
25 changes: 25 additions & 0 deletions src/gbigsmiles/bond.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,13 @@
# Copyright (c) 2022: Ludwig Schneider
# See LICENSE for details

try:
from typing import Self
except ImportError:
from typing_extensions import Self

from .core import BigSMILESbase
from .parser import get_global_parser


def _create_compatible_bond_text(bond):
Expand Down Expand Up @@ -309,6 +314,12 @@ def transition(self):


class BondDescriptor(BigSMILESbase):
@classmethod
def make(cls, text: str) -> Self:
if "$" in text or "<" in text or ">" in text:
return SimpleBondDescriptor.make(text)
return TerminalBondDescriptor.make(text)

@property
def symbol(self):
return None
Expand All @@ -334,6 +345,13 @@ def __init__(self, children):
if isinstance(child, InnerBondDescriptor):
self._inner_bond_descriptor = child

@classmethod
def make(cls, text: str) -> Self:
# We use BigSMILESbase.make.__func__ to get the underlying function of the class method,
# then call it with cls as the first argument to ensure child typing.
# We do not want to call StochasticDistribution's make function, because it directs here.
return BigSMILESbase.make.__func__(cls, text)

def generate_string(self, extension):
return "[" + self._inner_bond_descriptor.generate_string(extension) + "]"

Expand Down Expand Up @@ -369,6 +387,13 @@ def __init__(self, children):
if isinstance(child, BondDescriptorGeneration):
self._generation = child

@classmethod
def make(cls, text: str) -> Self:
# We use BigSMILESbase.make.__func__ to get the underlying function of the class method,
# then call it with cls as the first argument to ensure child typing.
# We do not want to call StochasticDistribution's make function, because it directs here.
return BigSMILESbase.make.__func__(cls, text)

@property
def weight(self):
return self._generation.weight
Expand Down
33 changes: 19 additions & 14 deletions src/gbigsmiles/data/g-bigsmiles.lark
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ bond_symbol: "-"
| "/"
| "\\"

ring_bond: bond_symbol? DIGIT
| bond_symbol? "%" DIGIT? DIGIT
ring_bond: bond_symbol? INT
| bond_symbol? "%" INT

_branched_atom: _atom_stand_in ring_bond* branch*

Expand Down Expand Up @@ -77,12 +77,18 @@ bond_descriptor: simple_bond_descriptor | ladder_bond_descriptor | non_covalent_
terminal_bond_descriptor: "[" bond_descriptor_symbol_idx? bond_descriptor_generation? "]"

stochastic_generation: "|" stochastic_distribution "|"
stochastic_distribution: "flory_schulz(" WS_INLINE* NUMBER WS_INLINE* ")"
| "schulz_zimm(" WS_INLINE* NUMBER WS_INLINE* "," WS_INLINE* NUMBER WS_INLINE* ")"
| "gauss(" WS_INLINE* NUMBER WS_INLINE* "," WS_INLINE* NUMBER WS_INLINE* ")"
| "uniform(" WS_INLINE* NUMBER WS_INLINE* "," WS_INLINE* NUMBER WS_INLINE* ")"
| "log_normal(" WS_INLINE* NUMBER WS_INLINE* "," WS_INLINE* NUMBER WS_INLINE* ")"
| "poisson(" WS_INLINE* NUMBER WS_INLINE* ")"
flory_schulz: "flory_schulz(" WS_INLINE* NUMBER WS_INLINE* ")"
schulz_zimm: "schulz_zimm(" WS_INLINE* NUMBER WS_INLINE* "," WS_INLINE* NUMBER WS_INLINE* ")"
gauss: "gauss(" WS_INLINE* NUMBER WS_INLINE* "," WS_INLINE* NUMBER WS_INLINE* ")"
uniform: "uniform(" WS_INLINE* NUMBER WS_INLINE* "," WS_INLINE* NUMBER WS_INLINE* ")"
log_normal: "log_normal(" WS_INLINE* NUMBER WS_INLINE* "," WS_INLINE* NUMBER WS_INLINE* ")"
poisson: "poisson(" WS_INLINE* NUMBER WS_INLINE* ")"
stochastic_distribution: flory_schulz
| schulz_zimm
| gauss
| uniform
| log_normal
| poisson

_unary_index_operator: "!"
_binary_index_operator: "~" | "&"
Expand All @@ -94,10 +100,10 @@ _non_covalent_key_value_pair: WS_INLINE* "," WS_INLINE* _printable_character+ "=
_non_covalent_context: WS_INLINE* "|" WS_INLINE* _index_expression _non_covalent_key_value_pair*

h_count: "H"
| "H" DIGIT
| "H" INT

atom_charge: "-" DIGIT?
| "+" DIGIT?
atom_charge: "-" INT?
| "+" INT?
| "--"
| "++"

Expand All @@ -117,7 +123,6 @@ end_group.-1: ";" WS_INLINE* smiles _monomer_list*
%import common.INT
%import common.SIGNED_NUMBER
%import common.NUMBER
%import common.DIGIT
%import common.WS
%import common.WS_INLINE

Expand Down Expand Up @@ -152,8 +157,8 @@ chiral: "@"
| "@SP1"
| "@SP2"
| "@SP3"
| "@TB" DIGIT? DIGIT
| "@OH" DIGIT? DIGIT
| "@TB" INT
| "@OH" INT

_element_symbols: "H"
| "He"
Expand Down
46 changes: 12 additions & 34 deletions src/gbigsmiles/exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class GBigSMILESError(Exception):
pass


class GBigSMILESParsingError(GBigSMILESError):
class ParsingError(GBigSMILESError):
"""
Parsing the Grammar went in an unanticipated manner.
Please report bug with input string.
Expand All @@ -24,39 +24,7 @@ def __str__(self):
return f"Unanticipated error while parsing. Please report and provide the input string. Token: {self.token} start: {self.token.start_pos}"


class GBigSMILESInitNotEnoughError(GBigSMILESError):
"""
GBigSMILES classes usually need to be initialized either via text,
or as part of parsing a different string.
If this isn't followed, this exception is raise.
Initialize the elements of G-BigSMILES with (part of) a G-BigSMILES string.
"""

def __init__(self, class_name):
self.class_name = class_name

def __str__(self):
return f"Attempt to initialize {self.class_name} without sufficient arguments. Initialize objects of {self.class_name} by passing (part of) a G-BigSMILES string."


class GBigSMILESInitTooMuchError(GBigSMILESError):
"""
GBigSMILES classes usually need to be initialized either via text,
or as part of parsing a different string, but not both.
If this isn't followed, this exception is raise.
Initialize the elements of G-BigSMILES with (part of) a G-BigSMILES string.
"""

def __init__(self, class_name):
self.class_name = class_name

def __str__(self):
return f"Attempt to initialize {self.class_name} with tree and text arguments. Initialize objects of {self.class_name} by passing (part of) a G-BigSMILES string."


class GBigSMILESTooManyTokens(GBigSMILESError):
class TooManyTokens(ParsingError):
def __init__(self, class_name, existing_token, new_token):
self.class_name = class_name
self.existing_token = existing_token
Expand All @@ -67,3 +35,13 @@ def __str__(self):
string += f"The existing token is {self.existing_token} which conflicts with the new "
string += f"token {self.new_token}. Most likely in implementation error, please report."
return string


class UnknownDistribution(GBigSMILESError):
def __init__(self, distribution_text: str):
self.distribution_text = distribution_text

def __str__(self):
string = f"GBigSMILES a distribution with the following text {self.distribution_text} is unknown."
string += " Typo or not implemented distribution."
return string
5 changes: 3 additions & 2 deletions src/gbigsmiles/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@
import networkx as nx

from .bond import _create_compatible_bond_text
from .core import _GLOBAL_RNG, BigSMILESbase
from .core import BigSMILESbase
from .mixture import Mixture
from .stochastic import Stochastic
from .stochastic_atom_graph import StochasticAtomGraph
from .token import SmilesToken
from .util import get_global_rng


class Molecule(BigSMILESbase):
Expand Down Expand Up @@ -144,7 +145,7 @@ def generate_string(self, extension):
string += self.mixture.generate_string(extension)
return string

def generate(self, prefix=None, rng=_GLOBAL_RNG):
def generate(self, prefix=None, rng=get_global_rng()):
my_mol = prefix
for element in self._elements:
my_mol = element.generate(my_mol, rng)
Expand Down
16 changes: 15 additions & 1 deletion src/gbigsmiles/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def _make_parser(filename=None, start_tokens=None):
"ladder_bond_descriptor",
"non_covalent_bond_descriptor",
"bond_descriptor",
"simple_bond_descriptor",
"terminal_bond_descriptor",
"stochastic_generation",
"stochastic_distribution",
Expand All @@ -41,9 +42,22 @@ def _make_parser(filename=None, start_tokens=None):
"atom_symbol",
"aromatic_symbol",
"bracket_atom",
"flory_schulz",
"uniform",
"schulz_zimm",
"log_normal",
"gauss",
]
parser = Lark(rf"{grammar_text}", start=start_tokens, keep_all_tokens=True)
return parser


_GLOBAL_PARSER = _make_parser()
_GLOBAL_PARSER: None | Lark = None


def get_global_parser():
global _GLOBAL_PARSER
if _GLOBAL_PARSER is None:
_GLOBAL_PARSER = _make_parser()

return _GLOBAL_PARSER
Loading

0 comments on commit 41f729e

Please sign in to comment.