Source code for pyKVFinder.grid.detect

import os
from typing import Optional

import numpy

__all__ = ["detect"]

from .geometry import _get_dimensions, _get_sincos


[docs] def detect( atomic: numpy.ndarray | list[list[str | float | int]], vertices: numpy.ndarray | list[list[float]], step: float | int = 0.6, probe_in: float | int = 1.4, probe_out: float | int = 4.0, removal_distance: float | int = 2.4, volume_cutoff: float | int = 5.0, latomic: Optional[numpy.ndarray | list[list[float]]] = None, ligand_cutoff: float | int = 5.0, box_adjustment: bool = False, surface: str = "SES", nthreads: int | None = None, verbose: bool = False, ) -> tuple[int, numpy.ndarray]: """Detects biomolecular cavities. Cavity points that belongs to the same cavity are assigned with an integer in the grid. Parameters ---------- atomic : numpy.ndarray | list[list[Union[str, float, int]]] A numpy array with atomic data (residue number, chain, residue name, atom name, xyz coordinates and radius) for each atom. vertices : numpy.ndarray | list[list[float]] A numpy.ndarray or a list with xyz vertices coordinates (origin, X-axis, Y-axis, Z-axis). step : float | int, optional Grid spacing (A), by default 0.6. probe_in : float | int, optional Probe In size (A), by default 1.4. probe_out : float | int, optional Probe Out size (A), by default 4.0. removal_distance : float | int, optional A length to be removed from the cavity-bulk frontier (A), by default 2.4. volume_cutoff : float | int, optional Volume filter for detected cavities (A3), by default 5.0. latomic : numpy.ndarray | list[list[Union[str, float, int]]], optional A numpy array with atomic data (residue number, chain, residue name, atom name, xyz coordinates and radius) for each atom of a target ligand, by default None. ligand_cutoff : float | int, optional A radius to limit a space around a ligand (A), by default 5.0. box_adjustment : bool, optional Whether a custom 3D grid is applied, by default False. surface : str, optional Surface representation. Keywords options are SES (Solvent Excluded Surface) or SAS (Solvent Accessible Surface), by default SES. nthreads : int | None, optional Number of threads, by default None. If None, the number of threads is `os.cpu_count() - 1`. verbose : bool, optional Print extra information to standard output, by default False. Returns ------- ncav : int Number of cavities. cavities : numpy.ndarray Cavity points in the 3D grid (cavities[nx][ny][nz]). Cavities array has integer labels in each position, that are: * -1: bulk points; * 0: biomolecule points; * 1: empty space points; * >=2: cavity points. The empty space points are regions that do not meet the chosen volume cutoff to be considered a cavity. Raises ------ TypeError `atomic` must be a list or a numpy.ndarray. ValueError `atomic` has incorrect shape. It must be (n, 8). TypeError `vertices` must be a list or a numpy.ndarray. ValueError `vertices` has incorrect shape. It must be (4, 3). TypeError `step` must be a positive real number. ValueError `step` must be a positive real number. TypeError `probe_in` must be a non-negative real number. ValueError `probe_in` must be a non-negative real number. TypeError `probe_out` must be a non-negative real number. ValueError `probe_out` must be a non-negative real number. ValueError `probe_out` must be greater than `probe_in`. TypeError `removal_distance` must be a non-negative real number. ValueError `removal_distance` must be a non-negative real number. TypeError `volume_cutoff` must be a non-negative real number. ValueError `volume_cutoff` must be a non-negative real number. TypeError `latomic` must be a list, a numpy.ndarray or None. ValueError `latomic` has incorrect shape. It must be (n, 8). TypeError `ligand_cutoff` must be a positive real number. ValueError `ligand_cutoff` must be a positive real number. TypeError `box_adjustment` must be a boolean. TypeError `surface` must be a string. TypeError `nthreads` must be a positive integer. ValueError `nthreads` must be a positive integer. ValueError `surface` must be SAS or SES, not `surface`. See also -------- read_pdb read_xyz get_vertices get_vertices_from_file spatial depth constitutional export Warning ------- If you are using box adjustment mode, do not forget to set box_adjustment flag to True and read the box configuration file with 'get_vertices_from_file' function. Warning ------- If you are using ligand adjustment mode, do not forget to read ligand atom coordinates with 'read_pdb' or 'read_xyz' functions. Example ------- With the grid vertices defined with ``get_vertices`` and atomic data loaded with ``read_pdb`` or ``read_xyz``, we can detect cavities on the whole target biomolecule: >>> from pyKVFinder import detect >>> ncav, cavities = detect(atomic, vertices) >>> ncav 18 >>> cavities array([[[-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], ..., [[-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]]], dtype=int32) However, users may opt to perform cavity detection in a segmented space through ligand adjustment and/or box adjustment modes. The cavity detection can be limited around the target ligand(s), which will be passed to pyKVFinder through a *.pdb* or a *.xyz* files. Thus, the detected cavities are limited within a radius (``ligand_cutoff``) of the target ligand(s). >>> import os >>> ligand = os.path.join(os.path.dirname(pyKVFinder.__file__), 'data', 'tests', 'ADN.pdb') >>> from pyKVFinder import read_pdb >>> latomic = read_pdb(ligand) >>> ncav, cavities = detect(atomic, vertices, latomic=latomic, ligand_cutoff=5.0) >>> ncav 2 >>> cavities array([[[-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], ..., [[-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]]], dtype=int32) Further, we can also perform cavity detection on a custom 3D grid, where we can explore closed regions with a custom box, which can be defined by a *.toml* file (see `Box configuration file template`). >>> import os >>> fn = os.path.join(os.path.dirname(pyKVFinder.__file__), 'data', 'tests', 'custom-box.toml') >>> with open(fn, 'r') as f: ... print(f.read()) [box] p1 = [3.11, 7.34, 1.59] p2 = [11.51, 7.34, 1.59] p3 = [3.11, 10.74, 1.59] p4 = [3.11, 7.34, 6.19] With this box adjustment mode, we must defined the 3D grid with ``get_vertices_from_file``. >>> from pyKVFinder import get_vertices_from_file >>> vertices, atomic = get_vertices_from_file(fn, atomic) Then, we can perform cavity detection: >>> ncav, cavities = detect(atomic, vertices, box_adjustment=True) >>> ncav 1 >>> cavities array([[[-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], ..., [[-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]]], dtype=int32) .. warning:: If you are using box adjusment mode, do not forget to set ``box_adjustment`` flag to ``True``. """ from pyKVFinder._pyKVFinder import _detect, _detect_ladj # Check arguments if type(atomic) not in [numpy.ndarray, list]: raise TypeError("`atomic` must be a list or a numpy.ndarray.") elif len(numpy.asarray(atomic).shape) != 2: raise ValueError("`atomic` has incorrect shape. It must be (n, 8).") elif numpy.asarray(atomic).shape[1] != 8: raise ValueError("`atomic` has incorrect shape. It must be (n, 8).") if type(vertices) not in [numpy.ndarray, list]: raise TypeError("`vertices` must be a list or a numpy.ndarray.") elif numpy.asarray(vertices).shape != (4, 3): raise ValueError("`vertices` has incorrect shape. It must be (4, 3).") if type(step) not in [float, int]: raise TypeError("`step` must be a positive real number.") elif step <= 0.0: raise ValueError("`step` must be a positive real number.") if type(probe_in) not in [float, int]: raise TypeError("`probe_in` must be a non-negative real number.") elif probe_in < 0.0: raise ValueError("`probe_in` must be a non-negative real number.") if type(probe_out) not in [float, int]: raise TypeError("`probe_out` must be a non-negative real number.") elif probe_out < 0.0: raise ValueError("`probe_out` must be a non-negative real number.") elif probe_out < probe_in: raise ValueError("`probe_out` must be greater than `probe_in`.") if type(removal_distance) not in [float, int]: raise TypeError("`removal_distance` must be a non-negative real number.") elif removal_distance < 0.0: raise ValueError("`removal_distance` must be a non-negative real number.") if type(volume_cutoff) not in [float, int]: raise TypeError("`volume_cutoff` must be a non-negative real number.") elif volume_cutoff < 0.0: raise ValueError("`volume_cutoff` must be a non-negative real number.") if latomic is not None: if type(latomic) not in [numpy.ndarray, list]: raise TypeError("`latomic` must be a list, a numpy.ndarray or None.") if len(numpy.asarray(latomic).shape) != 2: raise ValueError("`latomic` has incorrect shape. It must be (n, 8).") elif numpy.asarray(latomic).shape[1] != 8: raise ValueError("`latomic` has incorrect shape. It must be (n, 8).") if type(ligand_cutoff) not in [float, int]: raise TypeError("`ligand_cutoff` must be a positive real number.") elif ligand_cutoff <= 0.0: raise ValueError("`ligand_cutoff` must be a positive real number.") if type(box_adjustment) not in [bool]: raise TypeError("`box_adjustment` must be a boolean.") if type(surface) not in [str]: raise TypeError("`surface` must be a string.") if nthreads is None: nthreads = os.cpu_count() - 1 else: if type(nthreads) not in [int]: raise TypeError("`nthreads` must be a positive integer.") elif nthreads <= 0: raise ValueError("`nthreads` must be a positive integer.") if type(verbose) not in [bool]: raise TypeError("`verbose` must be a boolean.") # Convert types if isinstance(atomic, list): atomic = numpy.asarray(atomic) if isinstance(vertices, list): vertices = numpy.asarray(vertices) if isinstance(step, int): step = float(step) if isinstance(probe_in, int): probe_in = float(probe_in) if isinstance(probe_out, int): probe_out = float(probe_out) if isinstance(removal_distance, int): removal_distance = float(removal_distance) if isinstance(volume_cutoff, int): volume_cutoff = float(volume_cutoff) if isinstance(ligand_cutoff, int): ligand_cutoff = float(ligand_cutoff) if isinstance(latomic, list): latomic = numpy.asarray(latomic) # Convert numpy.ndarray data types vertices = vertices.astype("float64") if vertices.dtype != "float64" else vertices # Extract xyzr from atomic xyzr = atomic[:, 4:].astype(numpy.float64) # Extract lxyzr from latomic if latomic is not None: lxyzr = latomic[:, 4:].astype(numpy.float64) # Define ligand adjustment mode ligand_adjustment = True if latomic is not None else False # Unpack vertices P1, P2, P3, P4 = vertices if surface == "SES": if verbose: print("> Surface representation: Solvent Excluded Surface (SES)") surface = True elif surface == "SAS": if verbose: print("> Surface representation: Solvent Accessible Surface (SAS)") surface = False else: raise ValueError(f"`surface` must be SAS or SES, not {surface}.") # Get sincos: sine and cossine of the grid rotation angles (sina, cosa, sinb, cosb) sincos = _get_sincos(vertices) # Get dimensions nx, ny, nz = _get_dimensions(vertices, step) # Calculate number of voxels nvoxels = nx * ny * nz # Detect cavities if ligand_adjustment: ncav, cavities = _detect_ladj( nvoxels, nx, ny, nz, xyzr, lxyzr, P1, sincos, step, probe_in, probe_out, removal_distance, volume_cutoff, ligand_adjustment, ligand_cutoff, box_adjustment, P2, surface, nthreads, verbose, ) else: ncav, cavities = _detect( nvoxels, nx, ny, nz, xyzr, P1, sincos, step, probe_in, probe_out, removal_distance, volume_cutoff, box_adjustment, P2, surface, nthreads, verbose, ) return ncav, cavities.reshape(nx, ny, nz)