"""
Calculate a Protein-Ligand Interaction Fingerprint --- :mod:`prolif.fingerprint`
================================================================================
.. ipython:: python
:okwarning:
import MDAnalysis as mda
from rdkit.DataStructs import TanimotoSimilarity
import prolif
u = mda.Universe(prolif.datafiles.TOP, prolif.datafiles.TRAJ)
prot = u.select_atoms("protein")
lig = u.select_atoms("resname LIG")
fp = prolif.Fingerprint(["HBDonor", "HBAcceptor", "PiStacking", "CationPi",
"Cationic"])
fp.run(u.trajectory[::10], lig, prot)
df = fp.to_dataframe()
df
bv = fp.to_bitvectors()
TanimotoSimilarity(bv[0], bv[1])
"""
from functools import wraps
import numpy as np
from tqdm.auto import tqdm
from .interactions import _INTERACTIONS
from .molecule import Molecule
from .utils import get_residues_near_ligand, to_dataframe, to_bitvectors
def _return_first_element(f):
"""Modifies the return signature of a function by forcing it to return
only the first element if multiple values were returned.
Notes
-----
The original return signature of the decorated function is still accessible
by calling ``function.__wrapped__(*args, **kwargs)``.
Example
-------
::
>>> def foo():
... return 1, 2, 3
...
>>> bar = _return_first_element(foo)
>>> foo()
(1, 2, 3)
>>> bar()
1
>>> bar.__wrapped__()
(1, 2, 3)
"""
@wraps(f)
def wrapper(*args, **kwargs):
results = f(*args, **kwargs)
try:
value, *rest = results
except TypeError:
value = results
return value
return wrapper
[docs]class Fingerprint:
"""Class that generates an interaction fingerprint between two molecules
While in most cases the fingerprint will be generated between a ligand and
a protein, it is also possible to use this class for protein-protein
interactions, or host-guest systems.
Parameters
----------
interactions : list
List of names (str) of interaction classes as found in the
:mod:`prolif.interactions` module
Attributes
----------
interactions : dict
Dictionnary of interaction functions indexed by class name. For more
details, see :mod:`prolif.interactions`
n_interactions : int
Number of interaction functions registered by the fingerprint
ifp : list, optionnal
List of interactions fingerprints for the given trajectory.
Notes
-----
You can use the fingerprint generator in multiple ways:
- On a trajectory directly from MDAnalysis objects:
.. ipython:: python
prot = u.select_atoms("protein")
lig = u.select_atoms("resname LIG")
fp = prolif.Fingerprint(["HBDonor", "HBAcceptor", "PiStacking",
"Hydrophobic"])
fp.run(u.trajectory[:5], lig, prot)
fp.to_dataframe()
- On a specific frame and a specific pair of residues:
.. ipython:: python
u.trajectory[0] # use the first frame
prot = prolif.Molecule.from_mda(prot)
lig = prolif.Molecule.from_mda(lig)
fp.bitvector(lig, prot["ASP129.A"])
- On a specific pair of residues for a specific interaction:
.. ipython:: python
# ligand-protein
fp.hbdonor(lig, prot["ASP129.A"])
# protein-protein (alpha helix)
fp.hbacceptor(prot["ASP129.A"], prot["CYS133.A"])
You can also obtain the indices of atoms responsible for the interaction:
.. ipython:: python
fp.bitvector_atoms(lig, prot["ASP129.A"])
fp.hbdonor.__wrapped__(lig, prot["ASP129.A"])
"""
def __init__(self, interactions=["Hydrophobic", "HBDonor", "HBAcceptor",
"PiStacking", "Anionic", "Cationic", "CationPi", "PiCation"]):
# read interactions to compute
self.interactions = {}
if interactions == "all":
interactions = [i for i in _INTERACTIONS.keys()
if not (i.startswith("_") or i == "Interaction")]
for name, interaction_cls in _INTERACTIONS.items():
if name.startswith("_") or name == "Interaction":
continue
func = interaction_cls().detect
func = _return_first_element(func)
setattr(self, name.lower(), func)
if name in interactions:
self.interactions[name] = func
def __repr__(self): # pragma: no cover
name = ".".join([self.__class__.__module__, self.__class__.__name__])
params = f"{self.n_interactions} interactions: {list(self.interactions.keys())}"
return f"<{name}: {params} at {id(self):#x}>"
[docs] @staticmethod
def list_available(show_hidden=False):
"""List interactions available to the Fingerprint class.
Parameters
----------
show_hidden : bool
Show hidden classes (usually base classes whose name starts with an
underscore ``_``. Those are not supposed to be called directly)
"""
if show_hidden:
interactions = [name for name in _INTERACTIONS.keys()]
else:
interactions = [name for name in _INTERACTIONS.keys()
if not (name.startswith("_")
or name == "Interaction")]
return sorted(interactions)
@property
def n_interactions(self):
return len(self.interactions)
[docs] def bitvector(self, res1, res2):
"""Generates the complete bitvector for the interactions between two
residues. To get the indices of atoms responsible for each interaction,
see :meth:`bitvector_atoms`
Parameters
----------
res1 : prolif.residue.Residue or prolif.molecule.Molecule
A residue, usually from a ligand
res2 : prolif.residue.Residue or prolif.molecule.Molecule
A residue, usually from a protein
Returns
-------
bitvector : numpy.ndarray
An array storing the encoded interactions between res1 and res2
"""
bitvector = []
for func in self.interactions.values():
bit = func(res1, res2)
bitvector.append(bit)
return np.array(bitvector)
[docs] def bitvector_atoms(self, res1, res2):
"""Generates the complete bitvector for the interactions between two
residues, and returns the indices of atoms responsible for these
interactions
Parameters
----------
res1 : prolif.residue.Residue or prolif.molecule.Molecule
A residue, usually from a ligand
res2 : prolif.residue.Residue or prolif.molecule.Molecule
A residue, usually from a protein
Returns
-------
bitvector : numpy.ndarray
An array storing the encoded interactions between res1 and res2
lig_atoms : list
A list containing indices for the ligand atoms responsible for each
interaction
pro_atoms : list
A list containing indices for the protein atoms responsible for
each interaction
.. versionchanged:: 0.3.2
Atom indices are returned as two separate lists instead of a single
list of tuples
"""
bitvector = []
lig_atoms = []
prot_atoms = []
for func in self.interactions.values():
bit, la, pa = func.__wrapped__(res1, res2)
bitvector.append(bit)
lig_atoms.append(la)
prot_atoms.append(pa)
bitvector = np.array(bitvector)
return bitvector, lig_atoms, prot_atoms
[docs] def generate(self, lig, prot, residues=None, return_atoms=False):
"""Generates the interaction fingerprint between 2 molecules
Parameters
----------
lig : prolif.molecule.Molecule
Molecule for the ligand
prot : prolif.molecule.Molecule
Molecule for the protein
residues : list or "all" or None
A list of protein residues (:class:`str`, :class:`int` or
:class:`~prolif.residue.ResidueId`) to take into account for
the fingerprint extraction. If ``"all"``, all residues will be
used. If ``None``, at each frame the
:func:`~prolif.utils.get_residues_near_ligand` function is used to
automatically use protein residues that are distant of 6.0 Å or
less from each ligand residue.
return_atoms : bool
For each residue pair and interaction, return indices of atoms
responsible for the interaction instead of bits.
Returns
-------
ifp : dict
A dictionnary indexed by ``(ligand, protein)`` residue pairs. The
format for values will depend on ``return_atoms``:
- A single bitvector if ``return_atoms=False``
- A tuple of bitvector, ligand atom indices and protein atom
indices if ``return_atoms=True``
Example
-------
::
>>> u = mda.Universe("complex.pdb")
>>> lig = plf.Molecule.from_mda(u, "resname LIG")
>>> prot = plf.Molecule.from_mda(u, "protein")
>>> fp = prolif.Fingerprint()
>>> ifp = fp.generate(lig, prot)
.. versionadded:: 0.3.2
"""
ifp = {}
resids = residues
if residues == "all":
resids = prot.residues.keys()
for lresid, lres in lig.residues.items():
if residues is None:
resids = get_residues_near_ligand(lres, prot)
for prot_key in resids:
pres = prot[prot_key]
key = (lresid, pres.resid)
if return_atoms:
ifp[key] = self.bitvector_atoms(lres, pres)
else:
ifp[key] = self.bitvector(lres, pres)
return ifp
[docs] def run(self, traj, lig, prot, residues=None, progress=True):
"""Generates the fingerprint on a trajectory for a ligand and a protein
Parameters
----------
traj : MDAnalysis.coordinates.base.ProtoReader or MDAnalysis.coordinates.base.FrameIteratorSliced
Iterate over this Universe trajectory or sliced trajectory object
to extract the frames used for the fingerprint extraction
lig : MDAnalysis.core.groups.AtomGroup
An MDAnalysis AtomGroup for the ligand
prot : MDAnalysis.core.groups.AtomGroup
An MDAnalysis AtomGroup for the protein (with multiple residues)
residues : list or "all" or None
A list of protein residues (:class:`str`, :class:`int` or
:class:`~prolif.residue.ResidueId`) to take into account for
the fingerprint extraction. If ``"all"``, all residues will be
used. If ``None``, at each frame the
:func:`~prolif.utils.get_residues_near_ligand` function is used to
automatically use protein residues that are distant of 6.0 Å or
less from each ligand residue.
progress : bool
Use the `tqdm <https://tqdm.github.io/>`_ package to display a
progressbar while running the calculation
Returns
-------
prolif.fingerprint.Fingerprint
The Fingerprint instance that generated the fingerprint
Example
-------
::
>>> u = mda.Universe("top.pdb", "traj.nc")
>>> lig = u.select_atoms("resname LIG")
>>> prot = u.select_atoms("protein")
>>> fp = prolif.Fingerprint().run(u.trajectory[:10], lig, prot)
.. versionchanged:: 0.3.2
Moved the ``return_atoms`` parameter from the ``run`` method to the
dataframe conversion code
"""
iterator = tqdm(traj) if progress else traj
if residues == "all":
residues = Molecule.from_mda(prot).residues.keys()
ifp = []
for ts in iterator:
prot_mol = Molecule.from_mda(prot)
lig_mol = Molecule.from_mda(lig)
data = self.generate(lig_mol, prot_mol, residues=residues,
return_atoms=True)
data["Frame"] = ts.frame
ifp.append(data)
self.ifp = ifp
return self
[docs] def run_from_iterable(self, lig_iterable, prot_mol, residues=None,
progress=True):
"""Generates the fingerprint between a list of ligands and a protein
Parameters
----------
lig_iterable : list or generator
An iterable yielding ligands as :class:`~prolif.molecule.Molecule`
objects
prot_mol : prolif.molecule.Molecule
The protein
residues : list or "all" or None
A list of protein residues (:class:`str`, :class:`int` or
:class:`~prolif.residue.ResidueId`) to take into account for
the fingerprint extraction. If ``"all"``, all residues will be
used. If ``None``, at each frame the
:func:`~prolif.utils.get_residues_near_ligand` function is used to
automatically use protein residues that are distant of 6.0 Å or
less from each ligand residue.
progress : bool
Use the `tqdm <https://tqdm.github.io/>`_ package to display a
progressbar while running the calculation
Returns
-------
prolif.fingerprint.Fingerprint
The Fingerprint instance that generated the fingerprint
Example
-------
::
>>> prot = mda.Universe("protein.pdb")
>>> prot = plf.Molecule.from_mda(prot)
>>> lig_iter = plf.mol2_supplier("docking_output.mol2")
>>> fp = plf.Fingerprint()
>>> fp.run_from_iterable(lig_iter, prot)
.. seealso::
:meth:`Fingerprint.generate` to generate the fingerprint between
two single structures
.. versionchanged:: 0.3.2
Moved the ``return_atoms`` parameter from the ``run_from_iterable``
method to the dataframe conversion code
"""
iterator = tqdm(lig_iterable) if progress else lig_iterable
if residues == "all":
residues = prot_mol.residues.keys()
ifp = []
for i, lig_mol in enumerate(iterator):
data = self.generate(lig_mol, prot_mol, residues=residues,
return_atoms=True)
data["Frame"] = i
ifp.append(data)
self.ifp = ifp
return self
[docs] def to_dataframe(self, **kwargs):
"""Converts fingerprints to a pandas DataFrame
Parameters
----------
dtype : object or None
Cast the input of each bit in the bitvector to this type. If None,
keep the data as is
drop_empty : bool
Drop columns with only empty values
return_atoms : bool
For each residue pair and interaction, return indices of atoms
responsible for the interaction instead of bits
Returns
-------
df : pandas.DataFrame
A DataFrame storing the results frame by frame and residue by
residue. See :meth:`~prolif.utils.to_dataframe` for more
information
Raises
------
AttributeError
If the :meth:`run` method hasn't been used
Example
-------
::
>>> df = fp.to_dataframe(dtype=np.uint8)
>>> print(df)
ligand LIG1.G
protein ILE59 ILE55 TYR93
interaction Hydrophobic HBAcceptor Hydrophobic Hydrophobic PiStacking
Frame
0 0 1 0 0 0
...
"""
if hasattr(self, "ifp"):
return to_dataframe(self.ifp, self.interactions.keys(), **kwargs)
raise AttributeError("Please use the `run` method before")
[docs] def to_bitvectors(self):
"""Converts fingerprints to a list of RDKit ExplicitBitVector
Returns
-------
bvs : list
A list of :class:`~rdkit.DataStructs.cDataStructs.ExplicitBitVect`
for each frame
Raises
------
AttributeError
If the :meth:`run` method hasn't been used
Example
-------
::
>>> from rdkit.DataStructs import TanimotoSimilarity
>>> bv = fp.to_bitvectors()
>>> TanimotoSimilarity(bv[0], bv[1])
0.42
"""
if hasattr(self, "ifp"):
df = self.to_dataframe()
return to_bitvectors(df)
raise AttributeError("Please use the `run` method before")