"""
Residue-related classes --- :mod:`prolif.residue`
=================================================
"""
import re
from collections import UserDict
from typing import List, Optional
import numpy as np
from rdkit.Chem.rdmolops import FastFindRings
from prolif.rdkitmol import BaseRDKitMol
_RE_RESID = re.compile(r"(TIP3|[A-Z0-9]?[A-Z]{2,3})?(\d*)\.?(\w)?")
[docs]class ResidueId:
"""A unique residue identifier
Parameters
----------
name : str
3-letter residue name
number : int
residue number
chain : str or None, optionnal
1-letter protein chain
"""
def __init__(self, name: str = "UNK", number: int = 0, chain: Optional[str] = None):
self.name = name or "UNK"
self.number = number or 0
self.chain = chain or None
def __repr__(self):
return f"ResidueId({self.name}, {self.number}, {self.chain})"
def __str__(self):
resid = f"{self.name}{self.number}"
if self.chain:
resid += f".{self.chain}"
return resid
def __hash__(self):
return hash((self.name, self.number, self.chain))
def __eq__(self, other):
return hash(self) == hash(other)
def __lt__(self, other):
return (self.chain, self.number) < (other.chain, other.number)
[docs] @classmethod
def from_atom(cls, atom):
"""Creates a ResidueId from an RDKit atom
Parameters
----------
atom : rdkit.Chem.rdchem.Atom
An atom that contains an RDKit :class:`~rdkit.Chem.rdchem.AtomMonomerInfo`
"""
mi = atom.GetMonomerInfo()
if mi:
name = mi.GetResidueName()
number = mi.GetResidueNumber()
chain = mi.GetChainId()
return cls(name, number, chain)
return cls()
[docs] @classmethod
def from_string(cls, resid_str):
"""Creates a ResidueId from a string
Parameters
----------
resid_str : str
A string in the format ``<3-letter code><residue number>.<chain>``
All arguments are optionnal, and the dot should be present only if
the chain identifier is also present
Examples
--------
+-----------+----------------------------------+
| string | Corresponding ResidueId |
+===========+==================================+
| "ALA10.A" | ``ResidueId("ALA", 10, "A")`` |
+-----------+----------------------------------+
| "GLU33" | ``ResidueId("GLU", 33, None)`` |
+-----------+----------------------------------+
| "LYS.B" | ``ResidueId("LYS", 0, "B")`` |
+-----------+----------------------------------+
| "ARG" | ``ResidueId("ARG", 0, None)`` |
+-----------+----------------------------------+
| "5.C" | ``ResidueId("UNK", 5, "C")`` |
+-----------+----------------------------------+
| "42" | ``ResidueId("UNK", 42, None)`` |
+-----------+----------------------------------+
| ".D" | ``ResidueId("UNK", 0, "D")`` |
+-----------+----------------------------------+
| "" | ``ResidueId("UNK", 0, None)`` |
+-----------+----------------------------------+
"""
matches = _RE_RESID.search(resid_str)
name, number, chain = matches.groups()
number = int(number) if number else 0
return cls(name, number, chain)
[docs]class Residue(BaseRDKitMol):
"""A class for residues as RDKit molecules
Parameters
----------
mol : rdkit.Chem.rdchem.Mol
The residue as an RDKit molecule
Attributes
----------
resid : prolif.residue.ResidueId
The residue identifier
Notes
-----
The name of the residue can be converted to a string by using
``str(Residue)``
"""
def __init__(self, mol):
super().__init__(mol)
FastFindRings(self)
self.resid = ResidueId.from_atom(self.GetAtomWithIdx(0))
def __repr__(self): # pragma: no cover
name = ".".join([self.__class__.__module__, self.__class__.__name__])
return f"<{name} {self.resid} at {id(self):#x}>"
def __str__(self):
return str(self.resid)
[docs]class ResidueGroup(UserDict):
"""A container to store and retrieve Residue instances easily
Parameters
----------
residues : list
A list of :class:`~prolif.residue.Residue`
Attributes
----------
n_residues : int
Number of residues in the ResidueGroup
Notes
-----
Residues in the group can be accessed by :class:`ResidueId`, string, or
index. See the :class:`~prolif.molecule.Molecule` class for an example.
You can also use the :meth:`~prolif.residue.ResidueGroup.select` method to
access a subset of a ResidueGroup.
"""
def __init__(self, residues: List[Residue]):
self._residues = np.asarray(residues, dtype=object)
resinfo = [
(r.resid.name, r.resid.number, r.resid.chain) for r in self._residues
]
try:
name, number, chain = zip(*resinfo)
except ValueError:
self.name = np.array([], dtype=object)
self.number = np.array([], dtype=np.uint8)
self.chain = np.array([], dtype=object)
else:
self.name = np.asarray(name, dtype=object)
self.number = np.asarray(number, dtype=np.uint16)
self.chain = np.asarray(chain, dtype=object)
super().__init__([(r.resid, r) for r in self._residues])
def __getitem__(self, key):
# bool is a subclass of int but shouldn't be used here
if isinstance(key, bool):
raise KeyError(
f"Expected a ResidueId, int, or str, got {type(key).__name__!r} instead"
)
if isinstance(key, int):
return self._residues[key]
elif isinstance(key, str):
key = ResidueId.from_string(key)
return self.data[key]
elif isinstance(key, ResidueId):
return self.data[key]
raise KeyError(
f"Expected a ResidueId, int, or str, got {type(key).__name__!r} instead"
)
[docs] def select(self, mask):
"""Locate a subset of a ResidueGroup based on a boolean mask
Parameters
----------
mask : numpy.ndarray
A 1D array of ``dtype=bool`` with the same length as the number of
residues in the ResidueGroup. The mask should be constructed by
using conditions on the "name", "number", and "chain" residue
attributes as defined in the :class:`~prolif.residue.ResidueId`
class
Returns
-------
rg : prolif.residue.ResidueGroup
A subset of the original ResidueGroup
Examples
--------
::
>>> rg
<prolif.residue.ResidueGroup with 200 residues at 0x7f9a68719ac0>
>>> rg.select(rg.chain == "A")
<prolif.residue.ResidueGroup with 42 residues at 0x7fe3fdb86ca0>
>>> rg.select((10 <= rg.number) & (rg.number < 30))
<prolif.residue.ResidueGroup with 20 residues at 0x7f5f3c69aaf0>
>>> rg.select((rg.chain == "B") & (np.isin(rg.name, ["ASP", "GLU"])))
<prolif.residue.ResidueGroup with 3 residues at 0x7f5f3c510c70>
As seen in these examples, you can combine masks with different
operators, similarly to numpy boolean indexing or pandas
:meth:`~pandas.DataFrame.loc` method
* AND --> ``&``
* OR --> ``|``
* XOR --> ``^``
* NOT --> ``~``
"""
return ResidueGroup(self._residues[mask])
def __repr__(self): # pragma: no cover
name = ".".join([self.__class__.__module__, self.__class__.__name__])
return f"<{name} with {self.n_residues} residues at {id(self):#x}>"
@property
def n_residues(self):
return len(self)