Source code for prolif.residue

"""
Residue-related classes --- :mod:`prolif.residue`
=================================================
"""

import re
from collections import UserDict
from typing import List, Optional

import numpy as np
from rdkit.Chem.rdmolops import FastFindRings

from prolif.rdkitmol import BaseRDKitMol

_RE_RESID = re.compile(r"(TIP3|[A-Z0-9]?[A-Z]{2,3})?(\d*)\.?(\w)?")


[docs]class ResidueId: """A unique residue identifier Parameters ---------- name : str 3-letter residue name number : int residue number chain : str or None, optionnal 1-letter protein chain """ def __init__(self, name: str = "UNK", number: int = 0, chain: Optional[str] = None): self.name = name or "UNK" self.number = number or 0 self.chain = chain or None def __repr__(self): return f"ResidueId({self.name}, {self.number}, {self.chain})" def __str__(self): resid = f"{self.name}{self.number}" if self.chain: resid += f".{self.chain}" return resid def __hash__(self): return hash((self.name, self.number, self.chain)) def __eq__(self, other): return hash(self) == hash(other) def __lt__(self, other): return (self.chain, self.number) < (other.chain, other.number)
[docs] @classmethod def from_atom(cls, atom): """Creates a ResidueId from an RDKit atom Parameters ---------- atom : rdkit.Chem.rdchem.Atom An atom that contains an RDKit :class:`~rdkit.Chem.rdchem.AtomMonomerInfo` """ mi = atom.GetMonomerInfo() if mi: name = mi.GetResidueName() number = mi.GetResidueNumber() chain = mi.GetChainId() return cls(name, number, chain) return cls()
[docs] @classmethod def from_string(cls, resid_str): """Creates a ResidueId from a string Parameters ---------- resid_str : str A string in the format ``<3-letter code><residue number>.<chain>`` All arguments are optionnal, and the dot should be present only if the chain identifier is also present Examples -------- +-----------+----------------------------------+ | string | Corresponding ResidueId | +===========+==================================+ | "ALA10.A" | ``ResidueId("ALA", 10, "A")`` | +-----------+----------------------------------+ | "GLU33" | ``ResidueId("GLU", 33, None)`` | +-----------+----------------------------------+ | "LYS.B" | ``ResidueId("LYS", 0, "B")`` | +-----------+----------------------------------+ | "ARG" | ``ResidueId("ARG", 0, None)`` | +-----------+----------------------------------+ | "5.C" | ``ResidueId("UNK", 5, "C")`` | +-----------+----------------------------------+ | "42" | ``ResidueId("UNK", 42, None)`` | +-----------+----------------------------------+ | ".D" | ``ResidueId("UNK", 0, "D")`` | +-----------+----------------------------------+ | "" | ``ResidueId("UNK", 0, None)`` | +-----------+----------------------------------+ """ matches = _RE_RESID.search(resid_str) name, number, chain = matches.groups() number = int(number) if number else 0 return cls(name, number, chain)
[docs]class Residue(BaseRDKitMol): """A class for residues as RDKit molecules Parameters ---------- mol : rdkit.Chem.rdchem.Mol The residue as an RDKit molecule Attributes ---------- resid : prolif.residue.ResidueId The residue identifier Notes ----- The name of the residue can be converted to a string by using ``str(Residue)`` """ def __init__(self, mol): super().__init__(mol) FastFindRings(self) self.resid = ResidueId.from_atom(self.GetAtomWithIdx(0)) def __repr__(self): # pragma: no cover name = ".".join([self.__class__.__module__, self.__class__.__name__]) return f"<{name} {self.resid} at {id(self):#x}>" def __str__(self): return str(self.resid)
[docs]class ResidueGroup(UserDict): """A container to store and retrieve Residue instances easily Parameters ---------- residues : list A list of :class:`~prolif.residue.Residue` Attributes ---------- n_residues : int Number of residues in the ResidueGroup Notes ----- Residues in the group can be accessed by :class:`ResidueId`, string, or index. See the :class:`~prolif.molecule.Molecule` class for an example. You can also use the :meth:`~prolif.residue.ResidueGroup.select` method to access a subset of a ResidueGroup. """ def __init__(self, residues: List[Residue]): self._residues = np.asarray(residues, dtype=object) resinfo = [ (r.resid.name, r.resid.number, r.resid.chain) for r in self._residues ] try: name, number, chain = zip(*resinfo) except ValueError: self.name = np.array([], dtype=object) self.number = np.array([], dtype=np.uint8) self.chain = np.array([], dtype=object) else: self.name = np.asarray(name, dtype=object) self.number = np.asarray(number, dtype=np.uint16) self.chain = np.asarray(chain, dtype=object) super().__init__([(r.resid, r) for r in self._residues]) def __getitem__(self, key): # bool is a subclass of int but shouldn't be used here if isinstance(key, bool): raise KeyError( f"Expected a ResidueId, int, or str, got {type(key).__name__!r} instead" ) if isinstance(key, int): return self._residues[key] elif isinstance(key, str): key = ResidueId.from_string(key) return self.data[key] elif isinstance(key, ResidueId): return self.data[key] raise KeyError( f"Expected a ResidueId, int, or str, got {type(key).__name__!r} instead" )
[docs] def select(self, mask): """Locate a subset of a ResidueGroup based on a boolean mask Parameters ---------- mask : numpy.ndarray A 1D array of ``dtype=bool`` with the same length as the number of residues in the ResidueGroup. The mask should be constructed by using conditions on the "name", "number", and "chain" residue attributes as defined in the :class:`~prolif.residue.ResidueId` class Returns ------- rg : prolif.residue.ResidueGroup A subset of the original ResidueGroup Examples -------- :: >>> rg <prolif.residue.ResidueGroup with 200 residues at 0x7f9a68719ac0> >>> rg.select(rg.chain == "A") <prolif.residue.ResidueGroup with 42 residues at 0x7fe3fdb86ca0> >>> rg.select((10 <= rg.number) & (rg.number < 30)) <prolif.residue.ResidueGroup with 20 residues at 0x7f5f3c69aaf0> >>> rg.select((rg.chain == "B") & (np.isin(rg.name, ["ASP", "GLU"]))) <prolif.residue.ResidueGroup with 3 residues at 0x7f5f3c510c70> As seen in these examples, you can combine masks with different operators, similarly to numpy boolean indexing or pandas :meth:`~pandas.DataFrame.loc` method * AND --> ``&`` * OR --> ``|`` * XOR --> ``^`` * NOT --> ``~`` """ return ResidueGroup(self._residues[mask])
def __repr__(self): # pragma: no cover name = ".".join([self.__class__.__module__, self.__class__.__name__]) return f"<{name} with {self.n_residues} residues at {id(self):#x}>" @property def n_residues(self): return len(self)