Source code for FOX.io._read_top

"""A class for reading and GROMACS .top topology files.

Index
-----
.. currentmodule:: FOX
.. autosummary::
    TOPContainer
    TOPContainer.from_file
    TOPContainer.to_file
    TOPContainer.allclose
    TOPContainer.generate_pairs
    TOPContainer.generate_pairs_nb
    TOPContainer.copy
    TOPContainer.concatenate

API
---
.. autoclass:: TOPContainer
    :noindex:
    :members: defaults, atomtypes, pairtypes, bondtypes, angletypes, dihedraltypes,
        constrainttypes, nonbond_params, moleculetype, atoms, pairs, bonds, angles,
        dihedrals, system, molecules, DF_DTYPES, DF_DICT_DTYPES

.. automethod:: TOPContainer.from_file
.. automethod:: TOPContainer.to_file
.. automethod:: TOPContainer.allclose
.. automethod:: TOPContainer.generate_pairs
.. automethod:: TOPContainer.generate_pairs_nb
.. automethod:: TOPContainer.copy
.. autoattribute:: TOPContainer.concatenate

"""

from __future__ import annotations

import os
import copy
import types
import textwrap
import pprint
import warnings
from collections import defaultdict
from collections.abc import Callable, Mapping
from typing import TYPE_CHECKING, Any, ClassVar, TypeVar, Literal, cast

import h5py
import numpy as np
import pandas as pd

from . import FileIter
from ._top_concat import _TOPConcat
from ..ff import degree_of_separation

if TYPE_CHECKING:
    from typing_extensions import Self
    from numpy.typing import NDArray

    _T = TypeVar("_T")
    _DFType = TypeVar("_DFType", bound=pd.DataFrame | dict[int, pd.DataFrame])

    _DFNames = Literal[
        "defaults",
        "atomtypes",
        "moleculetype",
        "atoms",
        "system",
        "molecules",
        "pairs",
        "pairs_nb",
        "bonds",
        "angles",
        "dihedrals",
    ]
    _DFDictNames = Literal[
        "bondtypes",
        "pairtypes",
        "angletypes",
        "dihedraltypes",
        "constrainttypes",
        "nonbond_params",
    ]
    _DirectiveNames = _DFNames | _DFDictNames

    _DtypeMap = types.MappingProxyType[_T, np.dtype[np.void]]
    _DFKwargs = dict[_DFNames, pd.DataFrame]
    _DFDictKwargs = defaultdict[_DFDictNames, dict[int, pd.DataFrame]]

__all__ = ["TOPContainer", "TOPDirectiveWarning"]


class TOPDirectiveWarning(Warning):
    """Class for warnings related to .top directives."""


def _df_allclose(
    df1: pd.DataFrame,
    df2: pd.DataFrame,
    rtol: float,
    atol: float,
    equal_nan: bool,
) -> bool:
    if (
        df1.shape != df2.shape
        or not np.all(df1.columns == df2.columns)
        or not np.all(df1.index == df2.index)
    ):
        return False

    for k, series1 in df1.items():
        series2 = df2[k]
        if np.issubdtype(series1.dtype, np.inexact):
            if not np.allclose(series1, series2, rtol, atol, equal_nan):
                return False
        elif not np.array_equal(series1, series2):
            return False
    return True


_DF_DICT_DTYPES: _DtypeMap[str] = types.MappingProxyType({
    "pairtypes_1": np.dtype([
        ("atom1", "U5"),
        ("atom2", "U5"),
        ("func", "i8"),
        ("sigma", "f8"),
        ("epsilon", "f8"),
    ]),
    "pairtypes_2": np.dtype([
        ("atom1", "U5"),
        ("atom2", "U5"),
        ("func", "i8"),
        ("fudgeQQ", "f8"),
        ("qi", "f8"),
        ("qj", "f8"),
        ("sigma", "f8"),
        ("epsilon", "f8"),
    ]),
    "bondtypes_1_2_6_7": np.dtype([
        ("atom1", "U5"),
        ("atom2", "U5"),
        ("func", "i8"),
        ("b0", "f8"),
        ("k", "f8"),
    ]),
    "bondtypes_3": np.dtype([
        ("atom1", "U5"),
        ("atom2", "U5"),
        ("func", "i8"),
        ("b0", "f8"),
        ("D", "f8"),
        ("beta", "f8"),
    ]),
    "bondtypes_4": np.dtype([
        ("atom1", "U5"),
        ("atom2", "U5"),
        ("func", "i8"),
        ("b0", "f8"),
        ("C", "f8"),
    ]),
    "bondtypes_5": np.dtype([
        ("atom1", "U5"),
        ("atom2", "U5"),
        ("func", "i8"),
    ]),
    "bondtypes_8_9": np.dtype([
        ("atom1", "U5"),
        ("atom2", "U5"),
        ("func", "i8"),
        ("table_num", "i8"),
        ("k", "f8"),
    ]),
    "bondtypes_10": np.dtype([
        ("atom1", "U5"),
        ("atom2", "U5"),
        ("func", "i8"),
        ("low", "f8"),
        ("up", "f8"),
        ("k", "f8"),
    ]),
    "angletypes_1": np.dtype([
        ("atom1", "U5"),
        ("atom2", "U5"),
        ("atom3", "U5"),
        ("func", "i8"),
        ("theta", "f8"),
        ("ktheta", "f8"),
        ("ub0", "f8"),
        ("kub", "f8"),
    ]),
    "angletypes_1_2_10": np.dtype([
        ("atom1", "U5"),
        ("atom2", "U5"),
        ("atom3", "U5"),
        ("func", "i8"),
        ("theta", "f8"),
        ("k", "f8"),
    ]),
    "angletypes_3": np.dtype([
        ("atom1", "U5"),
        ("atom2", "U5"),
        ("atom3", "U5"),
        ("func", "i8"),
        ("r1", "f8"),
        ("r2", "f8"),
        ("k", "f8"),
    ]),
    "angletypes_4": np.dtype([
        ("atom1", "U5"),
        ("atom2", "U5"),
        ("atom3", "U5"),
        ("func", "i8"),
        ("r1", "f8"),
        ("r2", "f8"),
        ("r3", "f8"),
        ("k", "f8"),
    ]),
    "angletypes_5": np.dtype([
        ("atom1", "U5"),
        ("atom2", "U5"),
        ("atom3", "U5"),
        ("func", "i8"),
        ("theta", "f8"),
        ("ktheta", "f8"),
        ("ub0", "f8"),
        ("kub", "f8"),
    ]),
    "angletypes_6": np.dtype([
        ("atom1", "U5"),
        ("atom2", "U5"),
        ("atom3", "U5"),
        ("func", "i8"),
        ("theta", "f8"),
        ("C", "f8"),
    ]),
    "angletypes_8": np.dtype([
        ("atom1", "U5"),
        ("atom2", "U5"),
        ("atom3", "U5"),
        ("func", "i8"),
        ("table_num", "i8"),
        ("k", "f8"),
    ]),
    "angletypes_9": np.dtype([
        ("atom1", "U5"),
        ("atom2", "U5"),
        ("atom3", "U5"),
        ("func", "i8"),
        ("a", "f8"),
        ("k", "f8"),
    ]),
    "dihedraltypes_1_4_9": np.dtype([
        ("atom1", "U5"),
        ("atom2", "U5"),
        ("atom3", "U5"),
        ("atom4", "U5"),
        ("func", "i8"),
        ("phi0", "f8"),
        ("k", "f8"),
        ("n", "i8"),
    ]),
    "dihedraltypes_2": np.dtype([
        ("atom1", "U5"),
        ("atom2", "U5"),
        ("atom3", "U5"),
        ("atom4", "U5"),
        ("func", "i8"),
        ("xi0", "f8"),
        ("k", "f8"),
    ]),
    "dihedraltypes_3": np.dtype([
        ("atom1", "U5"),
        ("atom2", "U5"),
        ("atom3", "U5"),
        ("atom4", "U5"),
        ("func", "i8"),
        ("C0", "f8"),
        ("C1", "f8"),
        ("C2", "f8"),
        ("C3", "f8"),
        ("C4", "f8"),
        ("C5", "f8"),
    ]),
    "dihedraltypes_5": np.dtype([
        ("atom1", "U5"),
        ("atom2", "U5"),
        ("atom3", "U5"),
        ("atom4", "U5"),
        ("func", "i8"),
        ("C1", "f8"),
        ("C2", "f8"),
        ("C3", "f8"),
        ("C4", "f8"),
        ("C5", "f8"),
    ]),
    "dihedraltypes_8": np.dtype([
        ("atom1", "U5"),
        ("atom2", "U5"),
        ("atom3", "U5"),
        ("atom4", "U5"),
        ("func", "i8"),
        ("table_num", "i8"),
        ("k", "f8"),
    ]),
    "dihedraltypes_10": np.dtype([
        ("atom1", "U5"),
        ("atom2", "U5"),
        ("atom3", "U5"),
        ("atom4", "U5"),
        ("func", "i8"),
        ("phi0", "f8"),
        ("k", "f8"),
    ]),
    "dihedraltypes_11": np.dtype([
        ("atom1", "U5"),
        ("atom2", "U5"),
        ("atom3", "U5"),
        ("atom4", "U5"),
        ("func", "i8"),
        ("k", "f8"),
        ("a0", "f8"),
        ("a1", "f8"),
        ("a2", "f8"),
        ("a3", "f8"),
    ]),
    "constraints_1_2": np.dtype([
        ("atom1", "U5"),
        ("atom2", "U5"),
        ("func", "i8"),
    ]),
    "nonbond_params_1": np.dtype([
        ("atom1", "U5"),
        ("atom2", "U5"),
        ("func", "i8"),
        ("sigma", "f8"),
        ("epsilon", "f8"),
    ]),
    "nonbond_params_2": np.dtype([
        ("atom1", "U5"),
        ("atom2", "U5"),
        ("func", "i8"),
        ("a", "f8"),
        ("b", "f8"),
        ("c", "f8"),
    ]),
})


[docs]class TOPContainer: """A class for managing GROMACS .top topology files. Examples -------- .. code:: python >>> from FOX import TOPContainer >>> input_file: str = ... >>> output_file: str = ... >>> top = TOPContainer.from_file(input_file) >>> top.to_file(output_file) """ __slots__ = ( "__weakref__", "defaults", "atomtypes", "pairtypes", "bondtypes", "angletypes", "dihedraltypes", "constrainttypes", "nonbond_params", "moleculetype", "atoms", "pairs", "pairs_nb", "bonds", "angles", "dihedrals", "system", "molecules", "_concatenate", ) # parameter level #: A dataframe holding the ``defaults`` directive. defaults: pd.DataFrame #: A dataframe holding the ``atomtypes`` directive. atomtypes: pd.DataFrame #: A dictionary of dataframes holding the ``bondtypes`` directive. bondtypes: dict[int, pd.DataFrame] #: A dictionary of dataframes holding the ``pairtypes`` directive. pairtypes: dict[int, pd.DataFrame] #: A dictionary of dataframes holding the ``angletypes`` directive. angletypes: dict[int, pd.DataFrame] #: A dictionary of dataframes holding the ``dihedraltypes`` directive. dihedraltypes: dict[int, pd.DataFrame] #: A dictionary of dataframes holding the ``constrainttypes`` directive. constrainttypes: dict[int, pd.DataFrame] #: A dictionary of dataframes holding the ``nonbond_params`` directive. nonbond_params: dict[int, pd.DataFrame] # molecule level #: A dataframe holding the ``moleculetype`` directive. moleculetype: pd.DataFrame #: A dataframe holding the ``atoms`` directive. atoms: pd.DataFrame #: A dataframe holding the ``pairs`` directive. pairs: pd.DataFrame #: A dataframe holding the ``pairs_nb`` directive. pairs_nb: pd.DataFrame #: A dataframe holding the ``bonds`` directive. bonds: pd.DataFrame #: A dataframe holding the ``angles`` directive. angles: pd.DataFrame #: A dataframe holding the ``dihedrals`` directive. dihedrals: pd.DataFrame # system level #: A dataframe holding the ``system`` directive. system: pd.DataFrame #: A dataframe holding the ``molecules`` directive. molecules: pd.DataFrame @property def concatenate(self) -> _TOPConcat: """Namespace with functions for adding new directive-specific rows. .. currentmodule:: FOX.io._top_concat._TOPConcat .. autofunction:: atomtypes .. autofunction:: nonbond_params .. autofunction:: atoms .. autofunction:: pairs .. autofunction:: pairs_nb """ return self._concatenate #: A mapping holding the data types of all mandatory directives. DF_DTYPES: ClassVar[_DtypeMap[_DFNames]] = types.MappingProxyType({ "defaults": np.dtype([ ("nbfunc", "i8"), ("comb_rule", "i8"), ("gen_pairs", "U3"), ("fudgeLJ", "f8"), ("fudgeQQ", "f8"), ]), "atomtypes": np.dtype([ ("atom_type", "U5"), ("atnum", "i8"), ("mass", "f8"), ("charge", "f8"), ("particle_type", "U1"), ("sigma", "f8"), ("epsilon", "f8"), ]), "moleculetype": np.dtype([ ("molecule", "O"), ("n_rexcl", "i8"), ]), "atoms": np.dtype([ ("molecule", "O"), ("atom1", "i8"), ("atom_type", "U5"), ("res_num", "i8"), ("res_name", "U5"), ("atom_name", "U5"), ("charge_group", "i8"), ("charge", "f8"), ("mass", "f8"), ]), "system": np.dtype([ ("name", "O"), ]), "molecules": np.dtype([ ("molecule", "O"), ("n_mol", "i8"), ]), "bonds": np.dtype([ ("molecule", "O"), ("atom1", "i8"), ("atom2", "i8"), ("func", "i8"), ]), "angles": np.dtype([ ("molecule", "O"), ("atom1", "i8"), ("atom2", "i8"), ("atom3", "i8"), ("func", "i8"), ]), "dihedrals": np.dtype([ ("molecule", "O"), ("atom1", "i8"), ("atom2", "i8"), ("atom3", "i8"), ("atom4", "i8"), ("func", "i8"), ]), "pairs": np.dtype([ ("molecule", "O"), ("atom1", "i8"), ("atom2", "i8"), ("func", "i8"), ]), "pairs_nb": np.dtype([ ("molecule", "O"), ("atom1", "i8"), ("atom2", "i8"), ("func", "i8"), ]), }) #: A mapping holding the data types of all optional (dictionary of dataframe based) directives. DF_DICT_DTYPES: ClassVar[ types.MappingProxyType[_DFDictNames, _DtypeMap[int]] ] = types.MappingProxyType({ "pairtypes": types.MappingProxyType({ 1: _DF_DICT_DTYPES["pairtypes_1"], 2: _DF_DICT_DTYPES["pairtypes_2"], }), "bondtypes": types.MappingProxyType({ 1: _DF_DICT_DTYPES["bondtypes_1_2_6_7"], 2: _DF_DICT_DTYPES["bondtypes_1_2_6_7"], 3: _DF_DICT_DTYPES["bondtypes_3"], 4: _DF_DICT_DTYPES["bondtypes_4"], 5: _DF_DICT_DTYPES["bondtypes_5"], 6: _DF_DICT_DTYPES["bondtypes_1_2_6_7"], 7: _DF_DICT_DTYPES["bondtypes_1_2_6_7"], 8: _DF_DICT_DTYPES["bondtypes_8_9"], 9: _DF_DICT_DTYPES["bondtypes_8_9"], 10: _DF_DICT_DTYPES["bondtypes_10"], }), "angletypes": types.MappingProxyType({ 1: _DF_DICT_DTYPES["angletypes_1_2_10"], 2: _DF_DICT_DTYPES["angletypes_1_2_10"], 3: _DF_DICT_DTYPES["angletypes_3"], 4: _DF_DICT_DTYPES["angletypes_4"], 5: _DF_DICT_DTYPES["angletypes_5"], 6: _DF_DICT_DTYPES["angletypes_6"], 8: _DF_DICT_DTYPES["angletypes_8"], 9: _DF_DICT_DTYPES["angletypes_9"], 10: _DF_DICT_DTYPES["angletypes_1_2_10"], }), "dihedraltypes": types.MappingProxyType({ 1: _DF_DICT_DTYPES["dihedraltypes_1_4_9"], 2: _DF_DICT_DTYPES["dihedraltypes_2"], 3: _DF_DICT_DTYPES["dihedraltypes_3"], 4: _DF_DICT_DTYPES["dihedraltypes_1_4_9"], 5: _DF_DICT_DTYPES["dihedraltypes_5"], 8: _DF_DICT_DTYPES["dihedraltypes_8"], 9: _DF_DICT_DTYPES["dihedraltypes_1_4_9"], 10: _DF_DICT_DTYPES["dihedraltypes_10"], 11: _DF_DICT_DTYPES["dihedraltypes_11"], }), "constrainttypes": types.MappingProxyType({ 1: _DF_DICT_DTYPES["constraints_1_2"], 2: _DF_DICT_DTYPES["constraints_1_2"], }), "nonbond_params": types.MappingProxyType({ 1: _DF_DICT_DTYPES["nonbond_params_1"], 2: _DF_DICT_DTYPES["nonbond_params_2"], }), }) def __init__( self, *, defaults: None | pd.DataFrame = None, atomtypes: None | pd.DataFrame = None, moleculetype: None | pd.DataFrame = None, atoms: None | pd.DataFrame = None, system: None | pd.DataFrame = None, molecules: None | pd.DataFrame = None, bondtypes: None | dict[int, pd.DataFrame] = None, pairtypes: None | dict[int, pd.DataFrame] = None, angletypes: None | dict[int, pd.DataFrame] = None, dihedraltypes: None | dict[int, pd.DataFrame] = None, constrainttypes: None | dict[int, pd.DataFrame] = None, nonbond_params: None | dict[int, pd.DataFrame] = None, pairs: None | pd.DataFrame = None, pairs_nb: None | pd.DataFrame = None, bonds: None | pd.DataFrame = None, angles: None | pd.DataFrame = None, dihedrals: None | pd.DataFrame = None, ) -> None: """Initialize the instance.""" self.defaults = self._validate_attr(defaults, "defaults") self.atomtypes = self._validate_attr(atomtypes, "atomtypes") self.bondtypes = self._validate_attr(bondtypes, "bondtypes") self.pairtypes = self._validate_attr(pairtypes, "pairtypes") self.angletypes = self._validate_attr(angletypes, "angletypes") self.dihedraltypes = self._validate_attr(dihedraltypes, "dihedraltypes") self.constrainttypes = self._validate_attr(constrainttypes, "constrainttypes") self.nonbond_params = self._validate_attr(nonbond_params, "nonbond_params") self.moleculetype = self._validate_attr(moleculetype, "moleculetype") self.atoms = self._validate_attr(atoms, "atoms") self.pairs = self._validate_attr(pairs, "pairs") self.pairs_nb = self._validate_attr(pairs_nb, "pairs_nb") self.bonds = self._validate_attr(bonds, "bonds") self.angles = self._validate_attr(angles, "angles") self.dihedrals = self._validate_attr(dihedrals, "dihedrals") self.system = self._validate_attr(system, "system") self.molecules = self._validate_attr(molecules, "molecules") self._concatenate = _TOPConcat(self, "concatenate") @classmethod def _from_dict(cls, dct: dict[_DFNames | _DFDictNames, Any]) -> Self: """Helper function for :meth:`TOPContainer.__reduce__`.""" return cls(**dct) @staticmethod def _validate_df( attr: pd.DataFrame, dtype: np.dtype[np.void], name: _DirectiveNames, func: int | None = None, ) -> None: assert dtype.names is not None expected = set(dtype.names) observed = set(attr.columns) if expected != observed: missing = sorted(expected - observed) extra = sorted(observed - expected) name_repr = repr(name) if func is None else f"{name!r} (func {func})" raise ValueError( f"Key mismatch in argument {name_repr}:\n" f" missing keys: {missing!r}\n" f" extra keys: {extra!r}" ) @classmethod def _validate_attr(cls, attr: None | _DFType, name: _DirectiveNames) -> _DFType: """Perform some basic validation on the passed dataframe.""" if (dtype := cls.DF_DTYPES.get(name)) is not None: if attr is None: return pd.DataFrame.from_records(np.empty((0,), dtype=dtype)) elif not isinstance(attr, pd.DataFrame): raise TypeError( f"Argument {name!r} expects None or a dataframe; " f"observed type: {type(attr).__module__}.{type(attr).__name__}" ) else: cls._validate_df(attr, dtype, name) elif (dtype_dict := cls.DF_DICT_DTYPES.get(name)) is not None: if attr is None: return {} elif ( not isinstance(attr, dict) or not all(isinstance(v, pd.DataFrame) for v in attr.values()) ): raise TypeError( f"Argument {name!r} expects None or a a dictionary of dataframes; " f"observed type: {type(attr).__module__}.{type(attr).__name__}") else: for func, df in attr.items(): try: dtype = dtype_dict[func] except KeyError: raise ValueError(f"Invalid {name!r} func type: {func!r}") from None cls._validate_df(df, dtype, name, func) else: raise ValueError(f"Unknown directive {name!r}") return attr def __reduce__(self) -> tuple[Callable[..., Self], tuple[Any, ...]]: """Helper function for :mod:`pickle`.""" cls = type(self) return cls._from_dict, ({ "defaults": self.defaults, "atomtypes": self.atomtypes, "bondtypes": self.bondtypes, "pairtypes": self.pairtypes, "angletypes": self.angletypes, "dihedraltypes": self.dihedraltypes, "constrainttypes": self.constrainttypes, "nonbond_params": self.nonbond_params, "moleculetype": self.moleculetype, "atoms": self.atoms, "pairs": self.pairs, "pairs_nb": self.pairs_nb, "bonds": self.bonds, "angles": self.angles, "dihedrals": self.dihedrals, "system": self.system, "molecules": self.molecules, },) def __repr__(self) -> str: """Implement :func:`repr(self)<repr>`.""" # Get all to-be printed attribute (names) cls = type(self) attr_names = [ "defaults", "atomtypes", "pairtypes", "bondtypes", "angletypes", "dihedraltypes", "constrainttypes", "nonbond_params", "moleculetype", "atoms", "pairs", "pairs_nb", "bonds", "angles", "dihedrals", "system", "molecules", ] # Determine the indentation width width = max(len(k) for k in attr_names) indent = width + 3 # Gather string representations of all attributes ret = "" with pd.option_context("display.max_rows", 20): items = ((k, getattr(self, k)) for k in attr_names) for k, _v in items: v = textwrap.indent( pprint.pformat(_v, sort_dicts=False, width=100), " " * indent, )[indent:] ret += f"{k:{width}} = {v},\n" return f"{cls.__name__}(\n{textwrap.indent(ret[:-2], 4 * ' ')}\n)" def __eq__(self, other: object) -> bool: """Implement :meth:`self == other<object.__eq__>`.""" cls = type(self) if not isinstance(other, cls): return NotImplemented return self._compare(other, lambda i, j: i.equals(j))
[docs] def allclose( self, other: TOPContainer, *, rtol: float = 1e-05, atol: float = 1e-08, equal_nan: bool = True, ) -> bool: """Return whether two TOPContainers are equivalent within a given tolerance. Parameters ---------- other: TOPContainer The to-be compared TOPContainer rtol: float The relative tolerance parameter (see Notes). atol: float The absolute tolerance parameter (see Notes). equal_nan: bool Whether to compare NaN's as equal. If True, NaN's in a will be considered equal to NaN's in b in the output array. Returns ------- bool Whether the two containers are equivalent within a given tolerance. See Also -------- :func:`numpy.allclose` Returns True if two arrays are element-wise equal within a tolerance. """ cls = type(self) if not isinstance(other, cls): raise TypeError(f"Expected a TOPContainer; observed type: {type(other).__name__!r}") return self._compare(other, lambda i, j: _df_allclose(i, j, rtol, atol, equal_nan))
def _compare( self, other: TOPContainer, callback: Callable[[pd.Series, pd.Series], bool], ) -> bool: for name1 in self.DF_DTYPES: df1: pd.DataFrame = getattr(self, name1) df2: pd.DataFrame = getattr(other, name1) if not callback(df1, df2): return False for name2 in self.DF_DICT_DTYPES: df_dict1: dict[int, pd.DataFrame] = getattr(self, name2) df_dict2: dict[int, pd.DataFrame] = getattr(other, name2) if df_dict1.keys() != df_dict2.keys(): return False elif not all(callback(v, df_dict2[k]) for k, v in df_dict1.items()): return False return True
[docs] @classmethod def from_file(cls, path: str | os.PathLike[str]) -> Self: """Construct a new :class:`TOPContainer` from the passed file path. Parameters ---------- path : path-like object The path to the .top file Returns ------- FOX.TOPContainer A newly constructed .top container """ df_kwargs: _DFKwargs = {} df_dict_kwargs: _DFDictKwargs = defaultdict(dict) requires_mol = {"atoms", "pairs", "pairs_nb", "bonds", "angles", "dihedrals"} mol: str | None = None with open(path, "r", encoding="utf8") as _f: f = FileIter(_f, stripper=lambda i: i.partition(";")[0].strip()) directive: None | _DirectiveNames = None for i in f: while not (i.startswith("[") and i.endswith("]")): next(f) else: directive = cast("_DirectiveNames", i.strip("[] ")) break while directive is not None: old_directive = directive if old_directive in requires_mol: directive, func, df, mol_new = cls._parse_directive(f, old_directive, mol) else: directive, func, df, mol_new = cls._parse_directive(f, old_directive) if mol_new is not None: mol = mol_new if df is None: continue if old_directive in requires_mol or old_directive == "moleculetype": if old_directive in df_kwargs: df_kwargs[old_directive] = pd.concat( [df_kwargs[old_directive], df], copy=False, ) else: df_kwargs[old_directive] = df elif old_directive in cls.DF_DTYPES: df_kwargs[old_directive] = df elif old_directive in cls.DF_DICT_DTYPES: df_dict_kwargs[old_directive][func] = df return cls(**df_kwargs, **df_dict_kwargs)
@staticmethod def _get_func_index(dtype_dict: _DtypeMap[int]) -> int: dtype = dtype_dict[1] assert dtype.names is not None return dtype.names.index("func") @staticmethod def _get_err_msg( start: int | None, end: int | None, directive: _DirectiveNames, dtype: np.dtype[np.void], lst: list[tuple[str, ...]], ) -> str: if start is None or end is None: return f'failed to parse one or more entries in the "[ {directive} ]"' for i, tup in enumerate(lst, start=1): try: np.array(tup, dtype=dtype) except Exception: return ( f'failed to parse "[ {directive} ]" directive entry {i} ' f"between lines {start + 1} and {end - 1}" ) return ( f'failed to parse the "[ {directive} ]" directive between ' f'lines {start + 1} and {end - 1}' ) @classmethod def _parse_directive( cls, f: FileIter, directive: _DirectiveNames, prefix: None | str = None, ) -> tuple[None | _DirectiveNames, int, None | pd.DataFrame, None | str]: # Get all data within the current directive lst: list[tuple[str, ...]] = [] i_start = f.index mol_new: str | None = None try: while not ((item := next(f)).startswith("[") and item.endswith("]")): lst.append(tuple(item.split()) if prefix is None else (prefix, *item.split())) except StopIteration: new_directive = None else: new_directive = cast("_DirectiveNames", item.strip("[] ")) # Get the data type of the current directive func = 1 if (dtype := cls.DF_DTYPES.get(directive)) is not None: pass elif (dtype_dict := cls.DF_DICT_DTYPES.get(directive)) is not None: if len(lst) == 0: return (new_directive, func, None, mol_new) func = int(lst[0][cls._get_func_index(dtype_dict)]) dtype = dtype_dict[func] else: warnings.warn( f'Invalid or unsuported directive: "[ {directive} ]"', TOPDirectiveWarning, stacklevel=3, ) return (new_directive, func, None, mol_new) # Convert the data into a dataframe try: rec_array = np.array(lst, dtype=dtype) except Exception as ex: msg = cls._get_err_msg(i_start, f.index, directive, dtype, lst) raise ValueError(f"{f.name!r}: {msg}") from ex df = pd.DataFrame(rec_array) if directive == "moleculetype": assert len(df) == 1 mol_new = df.at[0, "molecule"] return (new_directive, func, df, mol_new)
[docs] def to_file(self, path: str | os.PathLike[str]) -> None: """Construct a new .top file from this instance. Parameters ---------- path : path-like object The path of the to-be created .top file """ kwargs = {"index": False, "col_space": 5} with open(path, "w", encoding="utf8") as f: # Parameter level f.write("[ defaults ]\n") if self.defaults.size: f.write(";") self.defaults.to_string(f, **kwargs) f.write("\n") f.write("\n[ atomtypes ]\n") if self.atomtypes.size: f.write(";") self.atomtypes.to_string(f, **kwargs) f.write("\n") for dir1 in self.DF_DICT_DTYPES: df_dict: Mapping[str, pd.DataFrame] = getattr(self, dir1) for df in df_dict.values(): if df.size: f.write(f"\n[ {dir1} ]\n") f.write(";") df.to_string(f, **kwargs) f.write("\n") # Molecule level mol_directives = [ "moleculetype", "atoms", "pairs", "pairs_nb", "bonds", "angles", "dihedrals" ] for name in self.moleculetype["molecule"]: for dir2 in mol_directives: if (df := getattr(self, dir2)) is None: continue if dir2 == "moleculetype": columns: slice | list[str] = slice(None) else: columns = [k for k in df if k != "molecule"] df = df.loc[df["molecule"] == name, columns] if df.size: f.write(f"\n[ {dir2} ]\n") f.write(";") df.to_string(f, **kwargs) f.write("\n") # System level f.write("\n[ system ]\n") if self.system.size: f.write(";") self.system.to_string(f, **kwargs) f.write("\n") f.write("\n[ molecules ]\n") if self.molecules.size: f.write(";") self.molecules.to_string(f, **kwargs) f.write("\n")
def _to_hdf5_dict(self) -> dict[str, NDArray[np.void]]: dtype_dct: dict[str, NDArray[np.void]] = {} for name1, _dtype in self.DF_DTYPES.items(): df: pd.DataFrame = getattr(self, name1, None) assert _dtype.fields is not None # Construct a h5py-compatible structured dtype dtype_list = [] for sub_field, (sub_dtype, *_) in _dtype.fields.items(): if sub_dtype.kind == "U": sub_dtype = h5py.string_dtype("utf-8", sub_dtype.itemsize // 4) elif sub_dtype.kind == "O": sub_dtype = h5py.string_dtype("utf-8") dtype_list.append((sub_field, sub_dtype)) dtype = np.dtype(dtype_list) dtype_dct[name1] = df.to_records(index=False).astype(dtype) iterator = ( (k, i, _dtype) for k, dct in self.DF_DICT_DTYPES.items() for i, _dtype in dct.items() ) for (name2, func, _dtype) in iterator: df = (pre_df.get(func) if (pre_df := getattr(self, name2)) is not None else None) if df is None: continue assert _dtype.fields is not None # Construct a h5py-compatible structured dtype dtype_list = [] for sub_field, (sub_dtype, *_) in _dtype.fields.items(): if sub_dtype.kind == "U": sub_dtype = h5py.string_dtype("utf-8", sub_dtype.itemsize // 4) elif sub_dtype.kind == "O": sub_dtype = h5py.string_dtype("utf-8") dtype_list.append((sub_field, sub_dtype)) dtype = np.dtype(dtype_list) dtype_dct[f"{name2}/{func}"] = df.to_records(index=False).astype(dtype) return dtype_dct
[docs] def generate_pairs(self, func: Literal[1, 2] = 1) -> None: """Construct and populate the ``pairs`` directive with explicit 1,4-pairs based on \ the available bonds. Parameters ---------- func: {1, 2} The func type as used for the new pairs. """ pair_dfs: list[pd.DataFrame] = [] for _, (mol, n_rexcl) in self.moleculetype.iterrows(): atom_count = len(self.atoms.loc[self.atoms["molecule"] == mol, :]) bonds = self.bonds.loc[self.bonds["molecule"] == mol, ["atom1", "atom2"]] - 1 if self.bonds.size == 0: continue depth_mat = np.triu(degree_of_separation( atom_count * [None], bond_mat=(np.ones(len(bonds), dtype=np.bool_), (bonds["atom1"], bonds["atom2"])) )) pairs_14 = np.asarray( np.where(depth_mat == n_rexcl), dtype=self.DF_DTYPES["pairs"]["atom1"], ) pairs_14 += 1 pair_dfs.append(pd.DataFrame({ "molecule": mol, "atom1": pairs_14[0], "atom2": pairs_14[1], "func": func, })) if len(pair_dfs) == 0: return keys = ["molecule", "atom1", "atom2"] pairs_new = pd.concat(pair_dfs, ignore_index=True) self.pairs = pairs_new[~pairs_new.duplicated(keys)].sort_values(keys, ignore_index=True)
[docs] def generate_pairs_nb(self, func: Literal[1] = 1) -> None: """Construct and populate the ``pairs_nb`` directive with explicit nonbonded pairs based \ on the available non-bonded atoms. Parameters ---------- func: {1} The func type as used for the new pairs. """ pair_dfs: list[pd.DataFrame] = [] for mol in self.molecules["molecule"]: atom_count = len(self.atoms.loc[self.atoms["molecule"] == mol, :]) bonds = self.bonds.loc[self.bonds["molecule"] == mol, ["atom1", "atom2"]] - 1 if self.bonds.size == 0: continue depth_mat = np.triu(degree_of_separation( atom_count * [None], bond_mat=(np.ones(len(bonds), dtype=np.bool_), (bonds["atom1"], bonds["atom2"])) )) pairs = np.asarray( np.where(np.isinf(depth_mat)), dtype=self.DF_DTYPES["pairs_nb"]["atom1"], ) pairs += 1 pair_dfs.append(pd.DataFrame({ "molecule": mol, "atom1": pairs[0], "atom2": pairs[1], "func": func, })) if len(pair_dfs) == 0: return keys = ["molecule", "atom1", "atom2"] pairs_new = pd.concat(pair_dfs, ignore_index=True) self.pairs_nb = pairs_new[~pairs_new.duplicated(keys)].sort_values(keys, ignore_index=True)
[docs] def copy(self, deep: bool = True) -> Self: """Return a copy of this instance. Parameters ---------- deep: bool Whether a deep copy should be created or not Returns ------- A copy of this instance """ return copy.deepcopy(self) if deep else copy.copy(self)