Source code for pyKVFinder.main

import logging
import os
import pathlib
import time
from datetime import datetime
from typing import Any, Dict, List, Tuple, Optional, Union

import numpy

from .argparser import argparser
from .grid import (
    _get_dimensions,
    _get_sincos,
    constitutional,
    depth,
    detect,
    export,
    get_vertices,
    get_vertices_from_file,
    hydropathy,
    spatial,
)
from .utils import (
    _write_parameters,
    calculate_frequencies,
    plot_frequencies,
    read_pdb,
    read_vdw,
    read_xyz,
    write_results,
)

__all__ = ["run_workflow", "pyKVFinderResults", "Molecule"]

VDW = os.path.join(os.path.abspath(os.path.dirname(__file__)), "data/vdw.dat")


def cli() -> None:
    """pyKVFinder Command Line Interface (CLI).

    Parameters
    ----------
    None

    Returns
    -------
    None

    Example
    -------
    Usage: pyKVFinder [-h] [-v] [--version] [-b <str>] [-O <str>]
                      [--nthreads <int>] [-d <str>] [-s <float>] [-i <float>]
                      [-o <float>] [-V <float>] [-R <float>] [-S <str>]
                      [--ignore_backbone] [-D] [--plot_frequencies]
                      [-B <.toml>] [-L (<.pdb> | <.xyz>)] [--ligand_cutoff <float>]
                      (<.pdb> | <.xyz>)
    """
    # Start time
    start_time = time.time()

    # Load pyKVFinder argument parser
    parser = argparser()

    # Parse command-line arguments
    args = parser.parse_args()

    # Get base name from input file if not defined by user
    if args.base_name is None:
        args.base_name = os.path.basename(
            args.input.replace(".pdb", "").replace(".xyz", "")
        )

    # Create output directory
    os.makedirs(args.output_directory, exist_ok=True)

    # Print message to stdout
    print(f"[PID {os.getpid()}] Running pyKVFinder for: {args.input}")

    # Start logging
    logging.basicConfig(
        filename=f"{os.path.join(args.output_directory, 'KVFinder.log')}",
        level=logging.INFO,
        format="%(message)s",
    )
    logging.info("=" * 80)
    logging.info(
        f"Date: {datetime.now().strftime('%a %d %B, %Y')}\nTime: {datetime.now().strftime('%H:%M:%S')}\n"
    )
    logging.info(f"[ Running pyKVFinder for: {args.input} ]")
    logging.info(f"> vdW radii file: {args.dictionary}")

    if args.verbose:
        print("> Loading atomic dictionary file")
    vdw = read_vdw(args.dictionary)

    if args.verbose:
        print("> Reading PDB coordinates")
    if args.input.endswith(".pdb"):
        atomic = read_pdb(args.input, vdw, args.model)
    elif args.input.endswith(".xyz"):
        atomic = read_xyz(args.input, vdw)

    if args.ligand:
        if args.verbose:
            print("> Reading ligand coordinates")
        if args.ligand.endswith(".pdb"):
            latomic = read_pdb(args.ligand, vdw)
        elif args.ligand.endswith(".xyz"):
            latomic = read_xyz(args.ligand, vdw)
    else:
        latomic = None

    if args.verbose:
        print("> Calculating 3D grid dimensions")
    if args.box:
        # Get vertices from file
        args.vertices, atomic = get_vertices_from_file(
            args.box,
            atomic,
            args.step,
            args.probe_in,
            args.probe_out,
            args.nthreads,
        )

        # Set flag to boolean
        args.box = True
    else:
        # Get vertices from input
        args.vertices = get_vertices(atomic, args.probe_out, args.step)

        # Set flag to boolean
        args.box = False

    # Calculate distance between points
    nx, ny, nz = _get_dimensions(args.vertices, args.step)

    # Calculate sin and cos of angles a and b
    args.sincos = _get_sincos(args.vertices)

    if args.verbose:
        print(f"p1: {args.vertices[0]}")
        print(f"p2: {args.vertices[1]}")
        print(f"p3: {args.vertices[2]}")
        print(f"p4: {args.vertices[3]}")
        print(f"Dimensions: (nx:{nx}, ny:{ny}, nz:{nz})")
        print(f"sina: {args.sincos[0]:.2f}\tsinb: {args.sincos[2]:.2f}")
        print(f"cosa: {args.sincos[1]:.2f}\tcosb: {args.sincos[3]:.2f}")

    # Logging parameters
    logging.info(f"> Step: {args.step} \u00c5")
    logging.info(f"> Probe In: {args.probe_in} \u00c5")
    logging.info(f"> Probe Out: {args.probe_out} \u00c5")
    logging.info(f"> Voxel volume: {args.step * args.step * args.step} \u00c5\u00b3")
    logging.info(f"> p1: {args.vertices[0]}")
    logging.info(f"> p2: {args.vertices[1]}")
    logging.info(f"> p3: {args.vertices[2]}")
    logging.info(f"> p4: {args.vertices[3]}")
    logging.info(f"> Dimensions: (nx:{nx}, ny:{ny}, nz:{nz})")
    logging.info(f"> sina: {args.sincos[0]:.2f}\tcosa: {args.sincos[1]:.2f}")
    logging.info(f"> sinb: {args.sincos[2]:.2f}\tcosb: {args.sincos[3]:.2f}")

    # Cavity detection
    ncav, cavities = detect(
        atomic,
        args.vertices,
        args.step,
        args.probe_in,
        args.probe_out,
        args.removal_distance,
        args.volume_cutoff,
        latomic,
        args.ligand_cutoff,
        args.box,
        args.surface,
        args.nthreads,
        args.verbose,
    )

    # Cavities were found
    if ncav > 0:
        # Spatial characterization
        surface, volume, area = spatial(
            cavities, args.step, None, args.nthreads, args.verbose
        )

        # Constitutional characterization
        residues = constitutional(
            cavities,
            atomic,
            args.vertices,
            args.step,
            args.probe_in,
            args.ignore_backbone,
            None,
            args.nthreads,
            args.verbose,
        )
        frequencies = calculate_frequencies(residues)

        # Depth characterization
        if args.depth:
            depths, max_depth, avg_depth = depth(
                cavities, args.step, None, args.nthreads, args.verbose
            )
        else:
            depths, max_depth, avg_depth = None, None, None

        # Plot bar charts of frequencies
        if args.plot_frequencies:
            output_plot = os.path.join(
                args.output_directory, f"{args.base_name}.barplot.pdf"
            )
            plot_frequencies(frequencies, output_plot)

        # Hydropathy characterization
        if args.hydropathy:
            # Map hydrophobicity scales
            scales, avg_hydropathy = hydropathy(
                surface,
                atomic,
                args.vertices,
                args.step,
                args.probe_in,
                args.hydropathy,
                args.ignore_backbone,
                None,
                args.nthreads,
                args.verbose,
            )
        else:
            scales, avg_hydropathy = None, None

        # Export cavities
        output_cavity = os.path.join(
            args.output_directory, f"{args.base_name}.KVFinder.output.pdb"
        )
        export(
            output_cavity,
            cavities,
            surface,
            args.vertices,
            args.step,
            depths,
            scales,
            None,
            args.nthreads,
        )

        # Write results
        output_results = os.path.join(
            args.output_directory, f"{args.base_name}.KVFinder.results.toml"
        )
        write_results(
            output_results,
            args.input,
            args.ligand,
            output_cavity,
            volume,
            area,
            max_depth,
            avg_depth,
            avg_hydropathy,
            residues,
            frequencies,
            args.step,
        )

        # Write parameters
        _write_parameters(args)
    else:
        print("> No cavities detected!")

    # Elapsed time
    elapsed_time = time.time() - start_time
    print(f"[ \033[1mElapsed time:\033[0m {elapsed_time:.4f}s ]")
    logging.info(f"[ Elapsed time (s): {elapsed_time:.4f}s ]\n")

    return 0


[docs]class pyKVFinderResults(object): """A class containing pyKVFinder detection and characterization results. Parameters ---------- cavities : numpy.ndarray Cavity points in the 3D grid (cavities[nx][ny][nz]). Cavities array has integer labels in each position, that are: * -1: bulk points; * 0: biomolecule points; * 1: empty space points; * >=2: cavity points. The empty space points are regions that do not meet the chosen volume cutoff to be considered a cavity. surface : numpy.ndarray Surface points in the 3D grid (surface[nx][ny][nz]). Surface array has integer labels in each position, that are: * -1: bulk points; * 0: biomolecule or empty space points; * >=2: surface points. The empty space points are regions that do not meet the chosen volume cutoff to be considered a cavity. depths : numpy.ndarray, optional A numpy.ndarray with depth of cavity points (depth[nx][ny][nz]). scales : numpy.ndarray, optional A numpy.ndarray with hydrophobicity scale value mapped at surface points (scales[nx][ny][nz]). volume : Dict[str, float] A dictionary with volume of each detected cavity. area : Dict[str, float] A dictionary with area of each detected cavity. max_depth : Dict[str, float], optional A dictionary with maximum depth of each detected cavity. avg_depth : Dict[str, float], optional A dictionary with average depth of each detected cavity. avg_hydropathy : Dict[str, float], optional A dictionary with average hydropathy for each detected cavity and the range of the hydrophobicity scale (min, max). residues: Dict[str, List[List[str]]] A dictionary with a list of interface residues for each detected cavity. frequencies : Dict[str, Dict[str, Dict[str, int]]], optional A dictionary with frequencies of residues and class for residues of each detected cavity. _vertices : numpy.ndarray A numpy.ndarray or a list with xyz vertices coordinates (origin, X-axis, Y-axis, Z-axis). _step : float Grid spacing (A). _input : Union[str, pathlib.Path], optional A path to input PDB or XYZ file, by default None. _ligand : Union[str, pathlib.Path], optional A path to ligand PDB or XYZ file, by default None. Attributes ---------- cavities : numpy.ndarray Cavity points in the 3D grid (cavities[nx][ny][nz]). Cavities array has integer labels in each position, that are: * -1: bulk points; * 0: biomolecule points; * 1: empty space points; * >=2: cavity points. The empty space points are regions that do not meet the chosen volume cutoff to be considered a cavity. surface : numpy.ndarray Surface points in the 3D grid (surface[nx][ny][nz]). Surface array has integer labels in each position, that are: * -1: bulk points; * 0: biomolecule or empty space points; * >=2: surface points. The empty space points are regions that do not meet the chosen volume cutoff to be considered a cavity. depths : numpy.ndarray, optional A numpy.ndarray with depth of cavity points (depth[nx][ny][nz]). scales : numpy.ndarray, optional A numpy.ndarray with hydrophobicity scale value mapped at surface points (scales[nx][ny][nz]). ncav : int Number of cavities. volume : Dict[str, float] A dictionary with volume of each detected cavity. area : Dict[str, float] A dictionary with area of each detected cavity. max_depth : Dict[str, float], optional A dictionary with maximum depth of each detected cavity. avg_depth : Dict[str, float], optional A dictionary with average depth of each detected cavity. avg_hydropathy : Dict[str, float], optional A dictionary with average hydropathy for each detected cavity and the range of the hydrophobicity scale (min, max). residues: Dict[str, List[List[str]]] A dictionary with a list of interface residues for each detected cavity. frequencies : Dict[str, Dict[str, Dict[str, int]]], optional A dictionary with frequencies of residues and class for residues of each detected cavity. _vertices : numpy.ndarray A numpy.ndarray or a list with xyz vertices coordinates (origin, X-axis, Y-axis, Z-axis). _step : float Grid spacing (A). _input : Union[str, pathlib.Path], optional A path to input PDB or XYZ file, by default None. _ligand : Union[str, pathlib.Path], optional A path to ligand PDB or XYZ file, by default None. """ def __init__( self, cavities: numpy.ndarray, surface: numpy.ndarray, depths: Optional[numpy.ndarray], scales: Optional[numpy.ndarray], volume: Dict[str, float], area: Dict[str, float], max_depth: Optional[Dict[str, float]], avg_depth: Optional[Dict[str, float]], avg_hydropathy: Optional[Dict[str, float]], residues: Dict[str, List[List[str]]], frequencies: Optional[Dict[str, Dict[str, Dict[str, int]]]], _vertices: numpy.ndarray, _step: Union[float, int], _input: Optional[Union[str, pathlib.Path]] = None, _ligand: Optional[Union[str, pathlib.Path]] = None, ): self.cavities = cavities self.surface = surface self.depths = depths self.scales = scales self.volume = volume self.ncav = cavities.max() - 1 self.area = area self.max_depth = max_depth self.avg_depth = avg_depth self.avg_hydropathy = avg_hydropathy self.residues = residues self.frequencies = frequencies self._vertices = _vertices self._step = _step self._input = os.path.abspath(_input) self._ligand = os.path.abspath(_ligand) if _ligand else None def __repr__(self): return "<pyKVFinderResults object>"
[docs] def export( self, output: Union[str, pathlib.Path] = "cavity.pdb", nthreads: Optional[int] = None, ) -> None: """Exports cavitiy (H) and surface (HA) points to PDB-formatted file with a variable (B; optional) in B-factor column, and hydropathy to PDB-formatted file in B-factor column at surface points (HA). Parameters ---------- output : Union[str, pathlib.Path]), optional A path to PDB file for writing cavities, by default `cavity.pdb`. nthreads : int, optional Number of threads, by default None. If None, the number of threads is `os.cpu_count() - 1`. Note ---- The cavity nomenclature is based on the integer label. The cavity marked with 2, the first integer corresponding to a cavity, is KAA, the cavity marked with 3 is KAB, the cavity marked with 4 is KAC and so on. Example ------- >>> import os >>> import pyKVFinder >>> pdb = os.path.join(os.path.dirname(pyKVFinder.__file__), 'data', 'tests', '1FMO.pdb') >>> results = pyKVFinder.run_workflow(pdb) >>> results.export() """ export( output, self.cavities, self.surface, self._vertices, self._step, self.depths, self.scales, None, nthreads, )
[docs] def write( self, fn: Union[str, pathlib.Path] = "results.toml", output: Optional[Union[str, pathlib.Path]] = None ) -> None: """ Writes file paths and cavity characterization to TOML-formatted file Parameters ---------- fn : Union[str, pathlib.Path], optional A path to TOML-formatted file for writing file paths and cavity characterization (volume, area, depth and interface residues) per cavity detected, by default `results.toml`. output : Union[str, pathlib.Path], optional A path to a cavity PDB file, by default None. Note ---- The cavity nomenclature is based on the integer label. The cavity marked with 2, the first integer corresponding to a cavity, is KAA, the cavity marked with 3 is KAB, the cavity marked with 4 is KAC and so on. Example ------- >>> import os >>> import pyKVFinder >>> pdb = os.path.join(os.path.dirname(pyKVFinder.__file__), 'data', 'tests', '1FMO.pdb') >>> results = pyKVFinder.run_workflow(pdb) >>> results.write() """ write_results( fn, self._input, self._ligand, output, self.volume, self.area, self.max_depth, self.avg_depth, self.avg_hydropathy, self.residues, self.frequencies, self._step, )
[docs] def plot_frequencies(self, pdf: Union[str, pathlib.Path] = "barplots.pdf"): """Plot bar charts of frequencies (residues and classes of residues) in a PDF file. Parameters ---------- pdf : Union[str, pathlib.Path], optional A path to a PDF file, by default `barplots.pdf`. Note ---- The cavity nomenclature is based on the integer label. The cavity marked with 2, the first integer corresponding to a cavity, is KAA, the cavity marked with 3 is KAB, the cavity marked with 4 is KAC and so on. Note ---- The classes of residues are: * Aliphatic apolar (R1): Alanine, Glycine, Isoleucine, Leucine, Methionine, Valine. * Aromatic (R2): Phenylalanine, Tryptophan, Tyrosine. * Polar Uncharged (R3): Asparagine, Cysteine, Glutamine, Proline, Serine, Threonine. * Negatively charged (R4): Aspartate, Glutamate. * Positively charged (R5): Arginine, Histidine, Lysine. * Non-standard (RX): Non-standard residues Example ------- >>> import os >>> import pyKVFinder >>> pdb = os.path.join(os.path.dirname(pyKVFinder.__file__), 'data', 'tests', '1FMO.pdb') >>> results = pyKVFinder.run_workflow(pdb) >>> results.plot_frequencies() """ plot_frequencies(self.frequencies, pdf)
[docs] def export_all( self, fn: Union[str, pathlib.Path] = "results.toml", output: Union[str, pathlib.Path] = "cavity.pdb", include_frequencies_pdf: bool = False, pdf: Union[str, pathlib.Path] = "barplots.pdf", nthreads: Optional[int] = None, ) -> None: """Exports cavities and characterization to PDB-formatted files, writes file paths and characterization to a TOML-formatted file, and optionally plot bar charts of frequencies (residues and classes of residues) in a PDF file. Parameters ---------- fn : Union[str, pathlib.Path], optional A path to TOML-formatted file for writing file paths and cavity characterization (volume, area and interface residues) per cavity detected, by default `results.toml`. output : Union[str, pathlib.Path], optional A path to PDB file for writing cavities, by default `cavity.pdb`. include_frequencies_pdf : bool, optional Whether to plot frequencies (residues and classes of residues) to PDF file, by default False. pdf : Union[str, pathlib.Path], optional A path to a PDF file, by default `barplots.pdf`. nthreads : int, optional Number of threads, by default None. If None, the number of threads is `os.cpu_count() - 1`. Note ---- The cavity nomenclature is based on the integer label. The cavity marked with 2, the first integer corresponding to a cavity, is KAA, the cavity marked with 3 is KAB, the cavity marked with 4 is KAC and so on. Note ---- The classes of residues are: * Aliphatic apolar (R1): Alanine, Glycine, Isoleucine, Leucine, Methionine, Valine. * Aromatic (R2): Phenylalanine, Tryptophan, Tyrosine. * Polar Uncharged (R3): Asparagine, Cysteine, Glutamine, Proline, Serine, Threonine. * Negatively charged (R4): Aspartate, Glutamate. * Positively charged (R5): Arginine, Histidine, Lysine. * Non-standard (RX): Non-standard residues. Example ------- >>> import os >>> import pyKVFinder >>> pdb = os.path.join(os.path.dirname(pyKVFinder.__file__), 'data', 'tests', '1FMO.pdb') >>> results = pyKVFinder.run_workflow(pdb) >>> results.export_all() Yet, we can set a ``include_frequencies_pdf`` flag to True to plot the bar charts of the frequencies in a PDF file. >>> results.export_all(include_frequencies_pdf=True) """ # Export cavity PDB file self.export(output, nthreads) # Write KVFinder results TOML self.write(fn, output) # Plot bar charts of frequencies if include_frequencies_pdf: self.plot_frequencies(pdf)
[docs]def run_workflow( input: Union[str, pathlib.Path], ligand: Optional[Union[str, pathlib.Path]] = None, vdw: Optional[Union[str, pathlib.Path]] = None, box: Optional[Union[str, pathlib.Path]] = None, step: Union[float, int] = 0.6, probe_in: Union[float, int] = 1.4, probe_out: Union[float, int] = 4.0, removal_distance: Union[float, int] = 2.4, volume_cutoff: Union[float, int] = 5.0, ligand_cutoff: Union[float, int] = 5.0, include_depth: bool = False, include_hydropathy: bool = False, hydrophobicity_scale: Union[str, pathlib.Path] = "EisenbergWeiss", surface: str = "SES", ignore_backbone: bool = False, model: Optional[int] = None, nthreads: Optional[int] = None, verbose: bool = False, ) -> pyKVFinderResults: """Detects and characterizes cavities (volume, area, depth [optional], hydropathy [optional] and interface residues). Parameters ---------- input : Union[str, pathlib.Path] A path to a target structure file, in PDB or XYZ format, to detect and characterize cavities. ligand : Union[str, pathlib.Path], optional A path to ligand file, in PDB or XYZ format, by default None. vdw : Union[str, pathlib.Path], optional A path to a van der Waals radii file, by default None. If None, apply the built-in van der Waals radii file: `vdw.dat`. box : Union[str, pathlib.Path], optional A path to box configuration file (TOML-formatted), by default None. step : Union[float, int], optional Grid spacing (A), by default 0.6. probe_in : Union[float, int], optional Probe In size (A), by default 1.4. probe_out : Union[float, int], optional Probe Out size (A), by default 4.0. removal_distance : Union[float, int], optional Length to be removed from the cavity-bulk frontier (A), by default 2.4. volume_cutoff : Union[float, int], optional Cavities volume filter (A3), by default 5.0. ligand_cutoff : Union[float, int], optional Radius value to limit a space around a ligand (A), by default 5.0. include_depth : bool, optional Whether to characterize the depth of the detected cavities, by default False. include_hydropathy : bool, optional Whether to characterize the hydropathy of the detected cavities, by default False. hydrophobicity_scale : Union[str, pathlib.Path], optional Name of a built-in hydrophobicity scale (EisenbergWeiss, HessaHeijne, KyteDoolitte, MoonFleming, RadzickaWolfenden, WimleyWhite, ZhaoLondon) or a path to a TOML-formatted file with a custom hydrophobicity scale, by default `EisenbergWeiss`. surface : str, optional Keywords options are SES (Solvent Excluded Surface) or SAS (Solvent Accessible Surface), by default SES. ignore_backbone : bool, optional Whether to ignore backbone atoms (C, CA, N, O) when defining interface residues, by default False. model : int, optional Model number, by default None. If None, keep atoms from all models. nthreads : int, optional Number of threads, by default None. If None, the number of threads is `os.cpu_count() - 1`. verbose : bool, optional Print extra information to standard output, by default False. Returns ------- results : pyKVFinderResults A class with the following attributes defined: * cavities : numpy.ndarray Cavity points in the 3D grid (cavities[nx][ny][nz]). Cavities array has integer labels in each position, that are: * -1: bulk points; * 0: biomolecule points; * 1: empty space points; * >=2: cavity points. The empty space points are regions that do not meet the chosen volume cutoff to be considered a cavity. * surface : numpy.ndarray Surface points in the 3D grid (surface[nx][ny][nz]). Surface array has integer labels in each position, that are: * -1: bulk points; * 0: biomolecule or empty space points; * >=2: surface points. The empty space points are regions that do not meet the chosen volume cutoff to be considered a cavity. * depths : numpy.ndarray, optional A numpy.ndarray with depth of cavity points (depth[nx][ny][nz]). * scales : numpy.ndarray, optional A numpy.ndarray with hydrophobicity scale value mapped at surface points (scales[nx][ny][nz]). * ncav : int Number of cavities. * volume : Dict[str, float] A dictionary with volume of each detected cavity. * area : Dict[str, float] A dictionary with area of each detected cavity. * max_depth : Dict[str, float], optional A dictionary with maximum depth of each detected cavity. * avg_depth : Dict[str, float], optional A dictionary with average depth of each detected cavity. * avg_hydropathy : Dict[str, float], optional A dictionary with average hydropathy for each detected cavity and the range of the hydrophobicity scale (min, max). * residues: Dict[str, List[List[str]]] A dictionary with a list of interface residues for each detected cavity. * frequencies : Dict[str, Dict[str, Dict[str, int]]], optional A dictionary with frequencies of residues and class for residues of each detected cavity. * _vertices : numpy.ndarray A numpy.ndarray or a list with xyz vertices coordinates (origin, X-axis, Y-axis, Z-axis). * _step : float Grid spacing (A). * _input : Union[str, pathlib.Path], optional A path to input PDB or XYZ file. * _ligand : Union[str, pathlib.Path], optional A path to ligand PDB or XYZ file. Raises ------ TypeError `input` must have .pdb or .xyz extension. TypeError `ligand` must have .pdb or .xyz extension. Note ---- The cavity nomenclature is based on the integer label. The cavity marked with 2, the first integer corresponding to a cavity, is KAA, the cavity marked with 3 is KAB, the cavity marked with 4 is KAC and so on. Note ---- The classes of residues are: * Aliphatic apolar (R1): Alanine, Glycine, Isoleucine, Leucine, Methionine, Valine. * Aromatic (R2): Phenylalanine, Tryptophan, Tyrosine. * Polar Uncharged (R3): Asparagine, Cysteine, Glutamine, Proline, Serine, Threonine. * Negatively charged (R4): Aspartate, Glutamate. * Positively charged (R5): Arginine, Histidine, Lysine. * Non-standard (RX): Non-standard residues. See Also -------- pyKVFinderResults Example ------- The **standard workflow** for cavity detection with spatial (surface points, volume, area) and constitutional (interface residues and their frequencies) characterization can be run at once with one command: >>> import os >>> import pyKVFinder >>> pdb = os.path.join(os.path.dirname(pyKVFinder.__file__), 'data', 'tests', '1FMO.pdb') >>> results = pyKVFinder.run_workflow(pdb) >>> results <pyKVFinderResults object> >>> results.cavities array([[[-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], ..., [[-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]]], dtype=int32) >>> results.surface array([[[-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], ..., [[-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]]], dtype=int32) >>> results.ncav >>> 18 >>> results.volume {'KAA': 137.16, 'KAB': 47.52, 'KAC': 66.96, 'KAD': 8.21, 'KAE': 43.63, 'KAF': 12.53, 'KAG': 6.26, 'KAH': 520.13, 'KAI': 12.31, 'KAJ': 26.57, 'KAK': 12.31, 'KAL': 33.91, 'KAM': 23.11, 'KAN': 102.82, 'KAO': 6.05, 'KAP': 15.55, 'KAQ': 7.99, 'KAR': 7.78} >>> results.area {'KAA': 126.41, 'KAB': 62.37, 'KAC': 74.57, 'KAD': 19.06, 'KAE': 57.08, 'KAF': 22.77, 'KAG': 15.38, 'KAH': 496.97, 'KAI': 30.58, 'KAJ': 45.64, 'KAK': 30.58, 'KAL': 45.58, 'KAM': 45.25, 'KAN': 129.77, 'KAO': 12.28, 'KAP': 25.04, 'KAQ': 13.46, 'KAR': 16.6} >>> results.residues {'KAA': [['14', 'E', 'SER'], ['15', 'E', 'VAL'], ['18', 'E', 'PHE'], ['19', 'E', 'LEU'], ['100', 'E', 'PHE'], ['152', 'E', 'LEU'], ['155', 'E', 'GLU'], ['156', 'E', 'TYR'], ['292', 'E', 'LYS'], ['302', 'E', 'TRP'], ['303', 'E', 'ILE'], ['306', 'E', 'TYR']], 'KAB': [['18', 'E', 'PHE'], ['22', 'E', 'ALA'], ['25', 'E', 'ASP'], ['26', 'E', 'PHE'], ['29', 'E', 'LYS'], ['97', 'E', 'ALA'], ['98', 'E', 'VAL'], ['99', 'E', 'ASN'], ['156', 'E', 'TYR']], 'KAC': [['141', 'E', 'PRO'], ['142', 'E', 'HIS'], ['144', 'E', 'ARG'], ['145', 'E', 'PHE'], ['148', 'E', 'ALA'], ['299', 'E', 'THR'], ['300', 'E', 'THR'], ['305', 'E', 'ILE'], ['310', 'E', 'VAL'], ['311', 'E', 'GLU'], ['313', 'E', 'PRO']], 'KAD': [['122', 'E', 'TYR'], ['124', 'E', 'ALA'], ['176', 'E', 'GLN'], ['318', 'E', 'PHE'], ['320', 'E', 'GLY'], ['321', 'E', 'PRO'], ['322', 'E', 'GLY'], ['323', 'E', 'ASP']], 'KAE': [['95', 'E', 'LEU'], ['98', 'E', 'VAL'], ['99', 'E', 'ASN'], ['100', 'E', 'PHE'], ['103', 'E', 'LEU'], ['104', 'E', 'VAL'], ['105', 'E', 'LYS'], ['106', 'E', 'LEU']], 'KAF': [['123', 'E', 'VAL'], ['124', 'E', 'ALA'], ['175', 'E', 'ASP'], ['176', 'E', 'GLN'], ['181', 'E', 'GLN']], 'KAG': [['34', 'E', 'SER'], ['37', 'E', 'THR'], ['96', 'E', 'GLN'], ['106', 'E', 'LEU'], ['107', 'E', 'GLU'], ['108', 'E', 'PHE'], ['109', 'E', 'SER']], 'KAH': [['49', 'E', 'LEU'], ['50', 'E', 'GLY'], ['51', 'E', 'THR'], ['52', 'E', 'GLY'], ['53', 'E', 'SER'], ['54', 'E', 'PHE'], ['55', 'E', 'GLY'], ['56', 'E', 'ARG'], ['57', 'E', 'VAL'], ['70', 'E', 'ALA'], ['72', 'E', 'LYS'], ['74', 'E', 'LEU'], ['84', 'E', 'GLN'], ['87', 'E', 'HIS'], ['88', 'E', 'THR'], ['91', 'E', 'GLU'], ['104', 'E', 'VAL'], ['120', 'E', 'MET'], ['121', 'E', 'GLU'], ['122', 'E', 'TYR'], ['123', 'E', 'VAL'], ['127', 'E', 'GLU'], ['166', 'E', 'ASP'], ['168', 'E', 'LYS'], ['170', 'E', 'GLU'], ['171', 'E', 'ASN'], ['173', 'E', 'LEU'], ['183', 'E', 'THR'], ['184', 'E', 'ASP'], ['186', 'E', 'GLY'], ['187', 'E', 'PHE'], ['201', 'E', 'THR'], ['327', 'E', 'PHE']], 'KAI': [['131', 'E', 'HIS'], ['138', 'E', 'PHE'], ['142', 'E', 'HIS'], ['146', 'E', 'TYR'], ['174', 'E', 'ILE'], ['314', 'E', 'PHE']], 'KAJ': [['33', 'E', 'PRO'], ['89', 'E', 'LEU'], ['92', 'E', 'LYS'], ['93', 'E', 'ARG'], ['96', 'E', 'GLN'], ['349', 'E', 'GLU'], ['350', 'E', 'PHE']], 'KAK': [['157', 'E', 'LEU'], ['162', 'E', 'LEU'], ['163', 'E', 'ILE'], ['164', 'E', 'TYR'], ['185', 'E', 'PHE'], ['188', 'E', 'ALA']], 'KAL': [['49', 'E', 'LEU'], ['127', 'E', 'GLU'], ['129', 'E', 'PHE'], ['130', 'E', 'SER'], ['326', 'E', 'ASN'], ['327', 'E', 'PHE'], ['328', 'E', 'ASP'], ['330', 'E', 'TYR']], 'KAM': [['51', 'E', 'THR'], ['55', 'E', 'GLY'], ['56', 'E', 'ARG'], ['73', 'E', 'ILE'], ['74', 'E', 'LEU'], ['75', 'E', 'ASP'], ['115', 'E', 'ASN'], ['335', 'E', 'ILE'], ['336', 'E', 'ARG']], 'KAN': [['165', 'E', 'ARG'], ['166', 'E', 'ASP'], ['167', 'E', 'LEU'], ['199', 'E', 'CYS'], ['200', 'E', 'GLY'], ['201', 'E', 'THR'], ['204', 'E', 'TYR'], ['205', 'E', 'LEU'], ['206', 'E', 'ALA'], ['209', 'E', 'ILE'], ['219', 'E', 'VAL'], ['220', 'E', 'ASP'], ['223', 'E', 'ALA']], 'KAO': [['48', 'E', 'THR'], ['51', 'E', 'THR'], ['56', 'E', 'ARG'], ['330', 'E', 'TYR'], ['331', 'E', 'GLU']], 'KAP': [['222', 'E', 'TRP'], ['238', 'E', 'PHE'], ['253', 'E', 'GLY'], ['254', 'E', 'LYS'], ['255', 'E', 'VAL'], ['273', 'E', 'LEU']], 'KAQ': [['207', 'E', 'PRO'], ['208', 'E', 'GLU'], ['211', 'E', 'LEU'], ['213', 'E', 'LYS'], ['275', 'E', 'VAL'], ['277', 'E', 'LEU']], 'KAR': [['237', 'E', 'PRO'], ['238', 'E', 'PHE'], ['249', 'E', 'LYS'], ['254', 'E', 'LYS'], ['255', 'E', 'VAL'], ['256', 'E', 'ARG']]} >>> results.frequencies {'KAA': {'RESIDUES': {'GLU': 1, 'ILE': 1, 'LEU': 2, 'LYS': 1, 'PHE': 2, 'SER': 1, 'TRP': 1, 'TYR': 2, 'VAL': 1}, 'CLASS': {'R1': 4, 'R2': 5, 'R3': 1, 'R4': 1, 'R5': 1, 'RX': 0}}, 'KAB': {'RESIDUES': {'ALA': 2, 'ASN': 1, 'ASP': 1, 'LYS': 1, 'PHE': 2, 'TYR': 1, 'VAL': 1}, 'CLASS': {'R1': 3, 'R2': 3, 'R3': 1, 'R4': 1, 'R5': 1, 'RX': 0}}, 'KAC': {'RESIDUES': {'ALA': 1, 'ARG': 1, 'GLU': 1, 'HIS': 1, 'ILE': 1, 'PHE': 1, 'PRO': 2, 'THR': 2, 'VAL': 1}, 'CLASS': {'R1': 5, 'R2': 1, 'R3': 2, 'R4': 1, 'R5': 2, 'RX': 0}}, 'KAD': {'RESIDUES': {'ALA': 1, 'ASP': 1, 'GLN': 1, 'GLY': 2, 'PHE': 1, 'PRO': 1, 'TYR': 1}, 'CLASS': {'R1': 4, 'R2': 2, 'R3': 1, 'R4': 1, 'R5': 0, 'RX': 0}}, 'KAE': {'RESIDUES': {'ASN': 1, 'LEU': 3, 'LYS': 1, 'PHE': 1, 'VAL': 2}, 'CLASS': {'R1': 5, 'R2': 1, 'R3': 1, 'R4': 0, 'R5': 1, 'RX': 0}}, 'KAF': {'RESIDUES': {'ALA': 1, 'ASP': 1, 'GLN': 2, 'VAL': 1}, 'CLASS': {'R1': 2, 'R2': 0, 'R3': 2, 'R4': 1, 'R5': 0, 'RX': 0}}, 'KAG': {'RESIDUES': {'GLN': 1, 'GLU': 1, 'LEU': 1, 'PHE': 1, 'SER': 2, 'THR': 1}, 'CLASS': {'R1': 1, 'R2': 1, 'R3': 4, 'R4': 1, 'R5': 0, 'RX': 0}}, 'KAH': {'RESIDUES': {'ALA': 1, 'ARG': 1, 'ASN': 1, 'ASP': 2, 'GLN': 1, 'GLU': 4, 'GLY': 4, 'HIS': 1, 'LEU': 3, 'LYS': 2, 'MET': 1, 'PHE': 3, 'SER': 1, 'THR': 4, 'TYR': 1, 'VAL': 3}, 'CLASS': {'R1': 11, 'R2': 4, 'R3': 8, 'R4': 6, 'R5': 4, 'RX': 0}}, 'KAI': {'RESIDUES': {'HIS': 2, 'ILE': 1, 'PHE': 2, 'TYR': 1}, 'CLASS': {'R1': 1, 'R2': 3, 'R3': 0, 'R4': 0, 'R5': 2, 'RX': 0}}, 'KAJ': {'RESIDUES': {'ARG': 1, 'GLN': 1, 'GLU': 1, 'LEU': 1, 'LYS': 1, 'PHE': 1, 'PRO': 1}, 'CLASS': {'R1': 2, 'R2': 1, 'R3': 1, 'R4': 1, 'R5': 2, 'RX': 0}}, 'KAK': {'RESIDUES': {'ALA': 1, 'ILE': 1, 'LEU': 2, 'PHE': 1, 'TYR': 1}, 'CLASS': {'R1': 4, 'R2': 2, 'R3': 0, 'R4': 0, 'R5': 0, 'RX': 0}}, 'KAL': {'RESIDUES': {'ASN': 1, 'ASP': 1, 'GLU': 1, 'LEU': 1, 'PHE': 2, 'SER': 1, 'TYR': 1}, 'CLASS': {'R1': 1, 'R2': 3, 'R3': 2, 'R4': 2, 'R5': 0, 'RX': 0}}, 'KAM': {'RESIDUES': {'ARG': 2, 'ASN': 1, 'ASP': 1, 'GLY': 1, 'ILE': 2, 'LEU': 1, 'THR': 1}, 'CLASS': {'R1': 4, 'R2': 0, 'R3': 2, 'R4': 1, 'R5': 2, 'RX': 0}}, 'KAN': {'RESIDUES': {'ALA': 2, 'ARG': 1, 'ASP': 2, 'CYS': 1, 'GLY': 1, 'ILE': 1, 'LEU': 2, 'THR': 1, 'TYR': 1, 'VAL': 1}, 'CLASS': {'R1': 7, 'R2': 1, 'R3': 2, 'R4': 2, 'R5': 1, 'RX': 0}}, 'KAO': {'RESIDUES': {'ARG': 1, 'GLU': 1, 'THR': 2, 'TYR': 1}, 'CLASS': {'R1': 0, 'R2': 1, 'R3': 2, 'R4': 1, 'R5': 1, 'RX': 0}}, 'KAP': {'RESIDUES': {'GLY': 1, 'LEU': 1, 'LYS': 1, 'PHE': 1, 'TRP': 1, 'VAL': 1}, 'CLASS': {'R1': 3, 'R2': 2, 'R3': 0, 'R4': 0, 'R5': 1, 'RX': 0}}, 'KAQ': {'RESIDUES': {'GLU': 1, 'LEU': 2, 'LYS': 1, 'PRO': 1, 'VAL': 1}, 'CLASS': {'R1': 4, 'R2': 0, 'R3': 0, 'R4': 1, 'R5': 1, 'RX': 0}}, 'KAR': {'RESIDUES': {'ARG': 1, 'LYS': 2, 'PHE': 1, 'PRO': 1, 'VAL': 1}, 'CLASS': {'R1': 2, 'R2': 1, 'R3': 0, 'R4': 0, 'R5': 3, 'RX': 0}}} However, users may opt to perform cavity detection in a segmented space through ligand adjustment and/or box adjustment modes. The cavity detection can be limited around the target ligand(s), which will be passed to pyKVFinder through a *.pdb* or a *.xyz* files. Thus, the detected cavities are limited within a radius (``ligand_cutoff``) of the target ligand(s). >>> ligand = os.path.join(os.path.dirname(pyKVFinder.__file__), 'data', 'tests', 'ADN.pdb') >>> results = pyKVFinder.run_workflow(pdb, ligand) >>> results <pyKVFinderResults object> >>> results.cavities array([[[-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], ..., [[-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]]], dtype=int32) >>> results.surface array([[[-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], ..., [[-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]]], dtype=int32) >>> results.ncav >>> 18 >>> results.volume {'KAA': 365.04, 'KAB': 16.85} >>> results.area {'KAA': 328.79, 'KAB': 23.15} >>> results.residues {'KAA': [['49', 'E', 'LEU'], ['50', 'E', 'GLY'], ['51', 'E', 'THR'], ['52', 'E', 'GLY'], ['53', 'E', 'SER'], ['55', 'E', 'GLY'], ['56', 'E', 'ARG'], ['57', 'E', 'VAL'], ['70', 'E', 'ALA'], ['72', 'E', 'LYS'], ['104', 'E', 'VAL'], ['120', 'E', 'MET'], ['121', 'E', 'GLU'], ['122', 'E', 'TYR'], ['123', 'E', 'VAL'], ['127', 'E', 'GLU'], ['166', 'E', 'ASP'], ['168', 'E', 'LYS'], ['170', 'E', 'GLU'], ['171', 'E', 'ASN'], ['173', 'E', 'LEU'], ['183', 'E', 'THR'], ['184', 'E', 'ASP'], ['327', 'E', 'PHE']], 'KAB': [['49', 'E', 'LEU'], ['127', 'E', 'GLU'], ['130', 'E', 'SER'], ['326', 'E', 'ASN'], ['327', 'E', 'PHE'], ['328', 'E', 'ASP'], ['330', 'E', 'TYR']]} >>> results.frequencies {'KAA': {'RESIDUES': {'ALA': 1, 'ARG': 1, 'ASN': 1, 'ASP': 2, 'GLU': 3, 'GLY': 3, 'LEU': 2, 'LYS': 2, 'MET': 1, 'PHE': 1, 'SER': 1, 'THR': 2, 'TYR': 1, 'VAL': 3}, 'CLASS': {'R1': 9, 'R2': 2, 'R3': 5, 'R4': 5, 'R5': 3, 'RX': 0}}, 'KAB': {'RESIDUES': {'ASN': 1, 'ASP': 1, 'GLU': 1, 'LEU': 1, 'PHE': 1, 'SER': 1, 'TYR': 1}, 'CLASS': {'R1': 1, 'R2': 2, 'R3': 2, 'R4': 2, 'R5': 0, 'RX': 0}}} Further, we can also perform cavity detection on a custom 3D grid, where we can explore closed regions with a custom box, which can be defined by a *.toml* file (see `Box configuration file template`). >>> fn = os.path.join(os.path.dirname(pyKVFinder.__file__), 'data', 'tests', 'custom-box.toml') >>> with open(fn, 'r') as f: ... print(f.read()) [box] p1 = [3.11, 7.34, 1.59] p2 = [11.51, 7.34, 1.59] p3 = [3.11, 10.74, 1.59] p4 = [3.11, 7.34, 6.19] >>> results = pyKVFinder.run_workflow(pdb, box=fn) >>> results <pyKVFinderResults object> >>> results.cavities array([[[-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], ..., [[-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]]], dtype=int32) >>> results.surface array([[[-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], ..., [[-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]]], dtype=int32) >>> results.ncav >>> 1 >>> results.volume {'KAA': 115.78} >>> results.area {'KAA': 33.91} >>> results.residues {'KAA': [['49', 'E', 'LEU'], ['50', 'E', 'GLY'], ['51', 'E', 'THR'], ['57', 'E', 'VAL'], ['70', 'E', 'ALA'], ['104', 'E', 'VAL'], ['121', 'E', 'GLU'], ['122', 'E', 'TYR'], ['123', 'E', 'VAL'], ['127', 'E', 'GLU'], ['170', 'E', 'GLU'], ['171', 'E', 'ASN'], ['173', 'E', 'LEU'], ['183', 'E', 'THR'], ['327', 'E', 'PHE']]} >>> results.frequencies {'KAA': {'RESIDUES': {'ALA': 1, 'ASN': 1, 'GLU': 3, 'GLY': 1, 'LEU': 2, 'PHE': 1, 'THR': 2, 'TYR': 1, 'VAL': 3}, 'CLASS': {'R1': 7, 'R2': 2, 'R3': 3, 'R4': 3, 'R5': 0, 'RX': 0}}} However, users may opt to perform the **full workflow** for cavity detection with spatial (surface points, volume and area), constitutional (interface residues and their frequencies), hydropathy and depth characterization. This full workflow can be run with one command by setting some parameters of ``run_workflow`` function: >>> results = pyKVFinder.run_workflow(pdb, include_depth=True, include_hydropathy=True, hydrophobicity_scale='EisenbergWeiss') >>> results.depths array([[[0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], ..., [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.]], ..., [[0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], ..., [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.]]]) >>> results.scales array([[[0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], ..., [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.]], ..., [[0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], ..., [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.]]]) >>> results.avg_depth {'KAA': 1.35, 'KAB': 0.91, 'KAC': 0.68, 'KAD': 0.32, 'KAE': 0.99, 'KAF': 0.24, 'KAG': 0.1, 'KAH': 3.91, 'KAI': 0.0, 'KAJ': 0.96, 'KAK': 0.0, 'KAL': 1.07, 'KAM': 0.24, 'KAN': 0.0, 'KAO': 0.29, 'KAP': 0.7, 'KAQ': 0.22, 'KAR': 0.12} >>> results.max_depth {'KAA': 3.79, 'KAB': 2.68, 'KAC': 2.62, 'KAD': 0.85, 'KAE': 3.0, 'KAF': 0.85, 'KAG': 0.6, 'KAH': 10.73, 'KAI': 0.0, 'KAJ': 2.24, 'KAK': 0.0, 'KAL': 3.0, 'KAM': 1.2, 'KAN': 0.0, 'KAO': 1.04, 'KAP': 2.08, 'KAQ': 0.85, 'KAR': 0.6} >>> results.avg_hydropathy {'KAA': -0.73, 'KAB': -0.05, 'KAC': -0.07, 'KAD': -0.62, 'KAE': -0.81, 'KAF': -0.14, 'KAG': -0.33, 'KAH': -0.17, 'KAI': -0.4, 'KAJ': 0.62, 'KAK': -0.99, 'KAL': 0.36, 'KAM': -0.33, 'KAN': 0.18, 'KAO': 0.88, 'KAP': -0.96, 'KAQ': 0.48, 'KAR': 0.24, 'EisenbergWeiss': [-1.42, 2.6]} """ if verbose: print("> Loading atomic dictionary file") if vdw is not None: vdw = read_vdw(vdw) else: vdw = read_vdw(VDW) if verbose: print("> Reading PDB coordinates") if input.endswith(".pdb"): atomic = read_pdb(input, vdw, model) elif input.endswith(".xyz"): atomic = read_xyz(input, vdw) else: raise TypeError("`target` must have .pdb or .xyz extension.") if ligand: if verbose: print("> Reading ligand coordinates") if ligand.endswith(".pdb"): latomic = read_pdb(ligand, vdw) elif ligand.endswith(".xyz"): latomic = read_xyz(ligand, vdw) else: raise TypeError("`ligand` must have .pdb or .xyz extension.") else: latomic = None if verbose: print("> Calculating 3D grid dimensions") if box: # Get vertices from file vertices, atomic = get_vertices_from_file( box, atomic, step, probe_in, probe_out, nthreads ) # Set flag to boolean box = True else: # Get vertices from input vertices = get_vertices(atomic, probe_out, step) # Set flag to boolean box = False # Calculate distance between points nx, ny, nz = _get_dimensions(vertices, step) if verbose: print(f"Dimensions: (nx:{nx}, ny:{ny}, nz:{nz})") # Calculate sin and cos of angles a and b sincos = _get_sincos(vertices) if verbose: print(f"sina: {sincos[0]:.2f}\tsinb: {sincos[2]:.2f}") print(f"cosa: {sincos[1]:.2f}\tcosb: {sincos[3]:.2f}") # Cavity detection ncav, cavities = detect( atomic, vertices, step, probe_in, probe_out, removal_distance, volume_cutoff, latomic, ligand_cutoff, box, surface, nthreads, verbose, ) if ncav > 0: # Spatial characterization surface, volume, area = spatial(cavities, step, None, nthreads, verbose) # Constitutional characterization residues = constitutional( cavities, atomic, vertices, step, probe_in, ignore_backbone, None, nthreads, verbose, ) frequencies = calculate_frequencies(residues) # Depth characterization if include_depth: depths, max_depth, avg_depth = depth( cavities, step, None, nthreads, verbose ) else: depths, max_depth, avg_depth = None, None, None # Hydropathy hydrophobicity scales if include_hydropathy: scales, avg_hydropathy = hydropathy( surface, atomic, vertices, step, probe_in, hydrophobicity_scale, ignore_backbone, None, nthreads, verbose, ) else: scales, avg_hydropathy = None, None else: print("Warning: No cavities detected, returning None!") return None # Return dict results = pyKVFinderResults( cavities, surface, depths, scales, volume, area, max_depth, avg_depth, avg_hydropathy, residues, frequencies, vertices, step, input, ligand, ) return results
[docs]class Molecule(object): """A class for representing molecular structures. Parameters ---------- molecule : Union[str, pathlib.Path] A file path to the molecule in either PDB or XYZ format radii : Union[str, pathlib.Path, Dict[str, Any]], optional A file path to a van der Waals radii file or a dictionary of VDW radii, by default None. If None, apply the built-in van der Waals radii file: `vdw.dat`. model : int, optional The model number of a multi-model PDB file, by default None. If None, keep atoms from all models. nthreads : int, optional Number of threads, by default None. If None, the number of threads is `os.cpu_count() - 1`. verbose : bool, optional Print extra information to standard output, by default False. Attributes ---------- _atomic : numpy.ndarray A numpy array with atomic data (residue number, chain, residue name, atom name, xyz coordinates and radius) for each atom. _dim : tuple Grid dimensions. _grid : numpy.ndarray Molecule points in the 3D grid (grid[nx][ny][nz]). Grid array has integer labels in each position, that are: * 0: molecule points; * 1: solvent points. _molecule : Union[str, pathlib.Path] A file path to the molecule in either PDB or XYZ format. _padding : float The length to add to each direction of the 3D grid. _probe : float Spherical probe size to define the molecular surface based on a molecular representation. _radii : Dict[str, Any] A dictionary containing radii values, by default None. _representation : str, optional Molecular surface representation. Keywords options are vdW (van der Waals surface), SES (Solvent Excluded Surface) or SAS (Solvent Accessible Surface), by default SES. _rotation : numpy.ndarray A numpy.ndarray with sine and cossine of the grid rotation angles (sina, cosa, sinb, cosb). _step : float Grid spacing (A). _vertices : numpy.ndarray A numpy.ndarray or a list with xyz vertices coordinates (origin, X-axis, Y-axis, Z-axis). nthreads : int Number of threads for parallel processing. verbose : bool Whether to print extra information to standard output. Note ---- The van der Waals radii file defines the radius values for each atom by residue and when not defined, it uses a generic value based on the atom type. The function by default loads the built-in van der Waals radii file: ``vdw.dat``. See Also -------- read_vdw Example ------- The ``Molecule`` class loads the target molecular structure (ClO4) into pyKVFinder. class. >>> import os >>> from pyKVFinder import Molecule >>> pdb = os.path.join(os.path.dirname(pyKVFinder.__file__), 'data', 'tests', 'ClO4.pdb') >>> molecule = Molecule(pdb) >>> molecule >>> <pyKVFinder.main.Molecule object at 0x7f5ddacf2230> The van der Waals radii can be define by: * creating a Python dictionary: >>> # PyMOL (v2.5.0) vdW radii values >>> vdw = {'GEN': {'CL': 1.75, 'O': 1.52}} >>> molecule = Molecule(pdb, radii=vdw) >>> molecule.radii {'GEN': {'CL': 1.75, 'O': 1.52}} * specifying a *.dat* file following template of `van der Waals radii file`. >>> from pyKVFinder import read_vdw >>> # ChimeraX vdW radii values >>> with open('vdw.dat', 'w') as f: ... f.write('>GEN\\nCL\\t\\t1.98\\nO\\t\\t1.46\\n') >>> vdw = read_vdw('vdw.dat') >>> molecule = Molecule(pdb, radii=vdw) >>> molecule.radii {'GEN': {'CL': 1.98, 'O': 1.46}} """ def __init__( self, molecule: Union[str, pathlib.Path], radii: Union[str, pathlib.Path, Dict[str, Any]] = None, model: Optional[int] = None, nthreads: Optional[int] = None, verbose: bool = False, ): """Initialize the Molecule object with molecule, radii, model, nthreads and verbose. Parameters ---------- molecule : Union[str, pathlib.Path] A file path to the molecule in either PDB or XYZ format. radii : Union[str, pathlib.Path, Dict[str, Any]], optional A file path to a van der Waals radii file or a dictionary of VDW radii, by default None. If None, apply the built-in van der Waals radii file: `vdw.dat`. model : int, optional The model number of a multi-model PDB file, by default None. If None, keep atoms from all models. nthreads : int, optional Number of threads, by default None. If None, the number of threads is `os.cpu_count() - 1`. verbose : bool, optional Print extra information to standard output, by default False. Raises ------ TypeError `molecule` must be a string or a pathlib.Path. TypeError `molecule` must have .pdb or .xyz extension. TypeError `nthreads` must be a positive integer. ValueError `nthreads` must be a positive integer. """ # Attributes self._grid = None self._step = None self._padding = None self._probe = None self._representation = None self._vertices = None self._dim = None self._rotation = None self.verbose = verbose # Molecule if type(molecule) not in [str, pathlib.Path]: raise TypeError("`molecule` must be a string or a pathlib.Path.") self._molecule = os.path.realpath(molecule) # van der Waals radii if self.verbose: print("> Loading van der Waals radii") if radii is None: # default self._radii = read_vdw(VDW) elif type(radii) in [str, pathlib.Path]: # vdw file self._radii = read_vdw(radii) elif type(radii) in [dict]: # Processed dictionary self._radii = radii # Atomic information if self.verbose: print("> Reading molecule coordinates") if molecule.endswith(".pdb"): self._atomic = read_pdb(molecule, self.radii, model) elif molecule.endswith(".xyz"): self._atomic = read_xyz(molecule, self.radii) else: raise TypeError("`molecule` must have .pdb or .xyz extension.") # Number of threads if nthreads is not None: if type(nthreads) not in [int]: raise TypeError("`nthreads` must be a positive integer.") elif nthreads <= 0: raise ValueError("`nthreads` must be a positive integer.") else: self.nthreads = nthreads else: self.nthreads = os.cpu_count() - 1 @property def atomic(self) -> numpy.ndarray: """Get _atomic attribute.""" return self._atomic @property def dim(self) -> Tuple[int, int, int]: """Get _dim attribute""" return self._dim @property def grid(self) -> numpy.ndarray: """Get _grid attribute.""" return self._grid @property def molecule(self) -> Union[str, pathlib.Path]: """Get _molecule attribute.""" return self._molecule @property def nx(self) -> int: """Get grid units in X-axis.""" if self._dim is not None: return self._dim[0] @property def ny(self) -> int: """Get grid units in Y-axis.""" if self._dim is not None: return self._dim[1] @property def nz(self) -> int: """Get grid units in Z-axis.""" if self._dim is not None: return self._dim[2] @property def p1(self) -> numpy.ndarray: """Get origin of the 3D grid.""" if self._vertices is not None: return self._vertices[0] @property def p2(self) -> numpy.ndarray: """Get X-axis max of the 3D grid.""" if self._vertices is not None: return self._vertices[1] @property def p3(self) -> numpy.ndarray: """Get Y-axis max of the 3D grid.""" if self._vertices is not None: return self._vertices[2] @property def p4(self) -> numpy.ndarray: """Get Z-axis max of the 3D grid.""" if self._vertices is not None: return self._vertices[3] @property def padding(self) -> float: """Get _padding attribute.""" return self._padding @property def probe(self) -> float: """Get _probe attribute.""" return self._probe @property def radii(self) -> Dict[str, Any]: """Get _radii attribute.""" return self._radii @property def representation(self) -> str: """Get _representation attribute.""" return self._representation @property def rotation(self) -> numpy.ndarray: """Get _rotation attribute.""" return self._rotation @property def step(self) -> float: """Get _step attribute.""" if self._step is not None: return self._step @property def vertices(self) -> numpy.ndarray: """Get _vertices attribute.""" return self._vertices @property def xyzr(self) -> numpy.ndarray: """Get xyz coordinates and radius of molecule atoms.""" return self._atomic[:, 4:].astype(numpy.float64) def _set_grid(self, padding: Optional[float] = None) -> None: """Define the 3D grid for the target molecule. Parameters ---------- padding : float, optional The length to add to each direction of the 3D grid, by default None. If None, automatically define the length based on molecule coordinates, probe size, grid spacing and atom radii. Raises ------ TypeError `padding` must be a non-negative real number. ValueError `padding` must be a non-negative real number. """ # Padding if padding is not None: if type(padding) not in [int, float]: raise TypeError("`padding` must be a non-negative real number.") elif padding < 0.0: raise ValueError("`padding` must be a non-negative real number.") else: self._padding = padding else: self._padding = self._get_padding() # 3D grid if self.verbose: print("> Calculating 3D grid") self._vertices = get_vertices(self.atomic, self.padding, self.step) self._dim = _get_dimensions(self.vertices, self.step) self._rotation = _get_sincos(self.vertices) if self.verbose: print(f"p1: {self.vertices[0]}") print(f"p2: {self.vertices[1]}") print(f"p3: {self.vertices[2]}") print(f"p4: {self.vertices[3]}") print("nx: {}, ny: {}, nz: {}".format(*self.dim)) print("sina: {}, sinb: {}, cosa: {}, cosb: {}".format(*self.rotation)) def _get_padding(self) -> float: """Automatically define the padding based on molecule coordinates, probe size, grid spacing and atom radii. Returns ------- padding : float The length to add to each direction of the 3D grid. """ padding = 1.1 * self.xyzr[:, 3].max() if self.representation in ["SES", "SAS"]: padding += self._probe return float(padding.round(decimals=1))
[docs] def vdw(self, step: float = 0.6, padding: Optional[float] = None) -> None: """Fill the 3D grid with the molecule as the van der Waals surface representation. Parameters ---------- step : float, optional Grid spacing (A), by default 0.6. padding : float, optional The length to add to each direction of the 3D grid, by default None. If None, automatically define the length based on molecule coordinates, probe size, grid spacing and atom radii. Raises ------ TypeError `step` must be a positive real number. ValueError `step` must be a positive real number. Example ------- The ``Molecule.vdw()`` method takes a grid spacing and returns a NumPy array with the molecule points representing the vdW surface in the 3D grid. >>> # Grid Spacing (step): 0.1 >>> step = 0.1 >>> molecule.vdw(step=step) >>> molecule.grid array([[[1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1], ..., [1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1]], ..., [[1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1], ..., [1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1]]], dtype=int32) """ from _pyKVFinder import _fill_receptor # Check arguments if type(step) not in [int, float]: raise TypeError("`step` must be a postive real number.") elif step <= 0.0: raise ValueError("`step` must be a positive real number.") else: self._step = step # Attributes self._representation = "vdW" self._probe = None # Define 3D grid self._set_grid(padding) # van der Waals atoms (hard sphere model) to grid if self.verbose: print("> Inserting atoms with van der Waals radii into 3D grid") self._grid = _fill_receptor( self.nx * self.ny * self.nz, self.nx, self.ny, self.nz, self.xyzr, self.p1, self.rotation, self.step, 0.0, False, self.nthreads, self.verbose, ).reshape(self.nx, self.ny, self.nz)
[docs] def surface( self, step: float = 0.6, probe: float = 1.4, surface: str = "SES", padding: Optional[float] = None, ) -> None: """Fill the 3D grid with the molecule as the van der Waals surface representation. Parameters ---------- step : float, optional Grid spacing (A), by default 0.6. probe : float, optional Spherical probe size to define the molecular surface based on a molecular representation, by default 1.4. surface : str, optional Molecular surface representation. Keywords options are vdW (van der Waals surface), SES (Solvent Excluded Surface) or SAS (Solvent Accessible Surface), by default "SES". padding : float, optional The length to add to each direction of the 3D grid, by default None. If None, automatically define the length based on molecule coordinates, probe size, grid spacing and atom radii. Raises ------ TypeError `step` must be a positive real number. ValueError `step` must be a positive real number. TypeError `probe_out` must be a positive real number. ValueError `probe_out` must be a positive real number. Example ------- The ``Molecule.surface()`` method takes the grid spacing, the spherical probe size to model the surface, the surface representation and returns a NumPy array with the molecule points representing the SES in the 3D grid. The molecular surface can be modelled as: * Solvent Excluded Surface (SES): >>> # Surface Representation: SES >>> surface = 'SES' >>> # Grid Spacing (step): 0.1 >>> step = 0.1 >>> # Spherical Probe (probe): 1.4 >>> probe = 1.4 >>> molecule.surface(step=step, probe=probe, surface=surface) >>> molecule.grid array([[[1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1], ..., [1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1]], ..., [[1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1], ..., [1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1]]], dtype=int32) The molecular surface can be modelled as: * Solvent Accessible Surface (SAS): >>> # Surface Representation: SAS >>> surface = 'SAS' >>> # Grid Spacing (step): 0.1 >>> step = 0.1 >>> # Spherical Probe (probe): 1.4 >>> probe = 1.4 >>> molecule.surface(step=step, probe=probe, surface=surface) >>> molecule.grid array([[[1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1], ..., [1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1]], ..., [[1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1], ..., [1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1]]], dtype=int32) """ from _pyKVFinder import _fill_receptor # Check arguments if type(step) not in [int, float]: raise TypeError("`step` must be a postive real number.") elif step <= 0.0: raise ValueError("`step` must be a positive real number.") else: self._step = step # Probe if type(probe) not in [int, float, numpy.float64]: raise TypeError("`probe_out` must be a non-negative real number.") elif probe <= 0.0: raise ValueError("`probe_out` must be a non-negative real number.") self._probe = probe # Surface if surface == "SES": if self.verbose: print("> Surface representation: Solvent Excluded Surface (SES).") self._representation = surface surface = True elif surface == "SAS": if self.verbose: print("> Surface representation: Solvent Accessible Surface (SAS).") self._representation = surface surface = False else: raise ValueError(f"`surface` must be SAS or SES, not {surface}.") # Define 3D grid self._set_grid(padding) # Molecular surface (SES or SAS) to grid self._grid = _fill_receptor( self.nx * self.ny * self.nz, self.nx, self.ny, self.nz, self.xyzr, self.p1, self.rotation, self.step, self.probe, surface, self.nthreads, self.verbose, ).reshape(self.nx, self.ny, self.nz)
[docs] def volume(self) -> float: """Estimate the volume of the molecule based on the molecular surface representation, ie, vdW, SES or SAS representations. Returns ------- volume : float Molecular volume (A³). Example ------- With the molecular surface modelled by ``Molecule.vdw()`` or ``Molecule.surface()``, the volume can be estimated by running: >>> molecule.volume() 90.8 """ from _pyKVFinder import _volume if self.grid is not None: volume = _volume( (self.grid == 0).astype(numpy.int32) * 2, self.step, 1, self.nthreads ) return volume.round(decimals=2).item()
[docs] def preview(self, **kwargs) -> None: """Preview the molecular surface in the 3D grid. Example ------- With the molecular surface modelled by ``Molecule.vdw()`` or ``Molecule.surface()``, the modelled molecule in the 3D grid can be previewed by running: >>> molecule.preview() """ if self.grid is not None: from plotly.express import scatter_3d x, y, z = numpy.nonzero(self.grid == 0) fig = scatter_3d(x=x, y=y, z=z, **kwargs) fig.update_layout( scene_xaxis_showticklabels=False, scene_yaxis_showticklabels=False, scene_zaxis_showticklabels=False, ) fig.show()
[docs] def export( self, fn: Union[str, pathlib.Path] = "molecule.pdb", ) -> None: """Export molecule points (H) to a PDB-formatted file. Parameters ---------- fn : Union[str, pathlib.Path], optional A file path to the molecular volume in the grid-based rerpesentation in PDB format, by default "molecule.pdb". Raises ------ TypeError `fn` must be a string or a pathlib.Path. Example ------- With the molecular surface modelled by ``Molecule.vdw()`` or ``Molecule.surface()``, the modelled molecule in the 3D grid can be exported to a PDB-formatted file by running: >>> molecule.export('model.pdb') """ # Filename (fn) if type(fn) not in [str, pathlib.Path]: raise TypeError("`fn` must be a string or a pathlib.Path.") os.makedirs(os.path.abspath(os.path.dirname(fn)), exist_ok=True) # Save grid to PDB file export( fn, (self.grid == 0).astype(numpy.int32) * 2, None, self.vertices, self.step )