Source code for pyKVFinder.utils

import os
import logging
import argparse
import pathlib
import numpy
from typing import Dict, List, Union, Optional

__all__ = [
    "read_vdw",
    "read_pdb",
    "read_xyz",
    "read_cavity",
    "calculate_frequencies",
    "plot_frequencies",
    "write_results",
]

VDW = os.path.join(os.path.abspath(os.path.dirname(__file__)), "data/vdw.dat")


[docs]def read_vdw(
    fn: Optional[Union[str, pathlib.Path]] = None
) -> Dict[str, Dict[str, float]]:
    """Reads van der Waals radii from .dat file.

    Parameters
    ----------
    fn : Optional[Union[str, pathlib.Path]], optional
        A path to a van der Waals radii file, by default None. If None, apply the built-in van der
        Waals radii file: `vdw.dat`.

    Returns
    -------
    vdw : Dict[str, Dict[str, float]]
        A dictionary containing radii values.

    Raises
    ------
    TypeError
        `fn` must be a string or a pathlib.Path.
    ValueError
        A line in `vdw` has incorrect format. The values must be double
        tab-separated.
    ValueError
        A line in `vdw` has an incorrect radius type for an atom.

    Note
    ----
    The van der Waals radii file defines the radius values for each
    atom by residue and when not defined, it uses a generic value
    based on the atom type (see `van der Waals file template`).
    The package contains a built-in van der Waals radii file: `vdw.dat`.
    """
    # Check argument
    if fn is not None:
        if type(fn) not in [str, pathlib.Path]:
            raise TypeError("`fn` must be a string or a pathlib.Path.")
    else:
        # Define default vdw file
        fn = VDW

    # Create vdw dictionary
    vdw = {}

    # Open fn
    with open(fn, "r") as f:
        # Read line with data only (ignore empty lines)
        lines = [
            line.replace(" ", "")
            for line in f.read().splitlines()
            if line.replace("\t\t", "")
        ]
        for line in lines:
            if not line.startswith("#"):
                if line.startswith(">"):
                    res = line.replace(">", "").replace("\t\t", "").replace(" ", "")
                    vdw[res] = {}
                else:
                    try:
                        atom, radius = line.split("\t\t")
                    except ValueError:
                        if len(line.split("\t\t")) != 2:
                            raise ValueError(
                                "A line in `vdw` has incorrect format. \
The values must be double tab-separated."
                            )
                    try:
                        vdw[res][atom] = float(radius)
                    except ValueError:
                        raise ValueError(
                            "A line in `vdw` has an incorrect radius type for \
an atom."
                        )

    return vdw


def _process_pdb_line(
    line: str, vdw: Dict[str, Dict[str, float]]
) -> List[Union[str, float, int]]:
    """Extracts ATOM and HETATM information of PDB line.

    Parameters
    ----------
    line : str
        A line of a valid PDB file
    vdw : Dict[str, Dict[str, Dict[str, float]]]
        A dictionary containing radii values.

    Returns
    -------
    atomic : List[Union[str, float, int]]
        A list with resnum, chain, resname, atom name, xyz coordinates and radius.
    """
    # Get PDB infomation
    atom = line[12:16].strip()
    resname = line[17:20].strip()
    resnum = int(line[22:26])
    chain = line[21]
    x = float(line[30:38])
    y = float(line[38:46])
    z = float(line[46:54])
    atom_symbol = line[76:78].strip().upper()

    # Get atom and radius from vdw
    if resname in vdw.keys() and atom in vdw[resname].keys():
        radius = vdw[resname][atom]
    else:
        radius = vdw["GEN"][atom_symbol]
        logging.info(
            f"Warning: Atom {atom} of residue {resname} \
not found in dictionary."
        )
        logging.info(
            f"Warning: Using generic atom {atom_symbol} \
radius: {radius} \u00c5."
        )

    # Prepare output
    atomic = [resnum, chain, resname, atom, x, y, z, radius]

    return atomic


def read_pdb(
    fn: Union[str, pathlib.Path],
    vdw: Optional[Dict[str, Dict[str, float]]] = None,
    model: Optional[int] = None,
) -> numpy.ndarray:
    """Reads PDB file into numpy.ndarrays.

    Parameters
    ----------
    fn : Union[str, pathlib.Path]
        A path to PDB file.
    vdw : Dict[str, Dict[str, float]], optional
        A dictionary containing radii values, by default None. If None, use output of `pyKVFinder.read_vdw()`.
    model : int, optional
        Model number, by default None. If None, keep atoms from all models.

    Returns
    -------
    atomic : numpy.ndarray
        A numpy array with atomic data (residue number, chain, residue name, atom name, xyz coordinates
        and radius) for each atom.

    Raises
    ------
    TypeError
        `fn` must be a string or a pathlib.Path.

    Note
    ----
    The van der Waals radii file defines the radius values for each atom
    by residue and when not defined, it uses a generic value based on the
    atom type. The function by default loads the built-in van der Waals radii
    file: `vdw.dat`.
    """
    # Check arguments
    if type(fn) not in [str, pathlib.Path]:
        raise TypeError("`fn` must be a string or a pathlib.Path.")
    if model is not None:
        if type(model) not in [int]:
            raise TypeError("`model` must be an integer.")

    # Define default vdw file
    if vdw is None:
        vdw = read_vdw(VDW)

    # Create lists
    atomic = []

    # Keep all models
    keep = True if model is None else False

    # Read file and process atoms
    with open(fn, "r") as f:
        for line in f.readlines():
            if model is not None:
                if line[:5] == "MODEL":
                    nmodel = int(line[5:].replace(" ", "").rstrip("\n"))
                    keep = True if model == nmodel else False
            if keep:
                if line[:4] == "ATOM" or line[:6] == "HETATM":
                    atomic.append(_process_pdb_line(line, vdw))

    return numpy.asarray(atomic)


def read_xyz(
    fn: Union[str, pathlib.Path], vdw: Optional[Dict[str, Dict[str, float]]] = None
) -> numpy.ndarray:
    """Reads XYZ file into numpy.ndarrays.

    Parameters
    ----------
    fn : Union[str, pathlib.Path]
        A path to XYZ file.
    vdw : Dict[str, Dict[str, float]], optional
        A dictionary containing radii values, by default None. If None, use output of `pyKVFinder.read_vdw()`.

    Returns
    -------
    atomic : numpy.ndarray
        A numpy array with atomic data (residue number, chain, residue name, atom name, xyz coordinates
        and radius) for each atom.

    Raises
    ------
    TypeError
        `fn` must be a string or a pathlib.Path.

    Note
    ----
    The van der Waals radii file defines the radius values for each atom
    by residue and when not defined, it uses a generic value based on the
    atom type. The function by default loads the built-in van der Waals radii
    file: `vdw.dat`.
    """
    # Check arguments
    if type(fn) not in [str, pathlib.Path]:
        raise TypeError("`fn` must be a string or a pathlib.Path.")

    # Define default vdw file
    if vdw is None:
        vdw = read_vdw(VDW)

    # Create lists
    atomic = []

    # Start resnum
    resnum = 0

    # Read XYZ file
    with open(fn, "r") as f:
        for line in f.readlines():
            line = line.split()
            if len(line) == 4:
                # Get PDB information
                atom_symbol = line[0].upper()
                x = float(line[1])
                y = float(line[2])
                z = float(line[3])

                # Get radius (generic value)
                radius = vdw["GEN"][atom_symbol]

                # Get resnum
                resnum += 1

                # Append data
                atomic.append([resnum, "A", "UNK", atom_symbol, x, y, z, radius])

    return numpy.asarray(atomic)


def _read_cavity(cavity: Union[str, pathlib.Path]) -> numpy.ndarray:
    """Reads xyz coordinates and labels of a cavities file into numpy.ndarray.

    Parameters
    ----------
    cavity : Union[str, pathlib.Path]
        A path to a PDB-formatted file of cavities.

    Returns
    -------
    xyzl : numpy.ndarray
        A numpy.ndarray with xyz coordinates and cavity label for each cavity point.
    """
    from .grid import _get_cavity_label

    # Create xyzl (xyz coordinates and cavity label)
    xyzl = []

    # Read cavity file into list
    with open(cavity, "r") as f:
        for line in f.readlines():
            if line[:4] == "ATOM" or line[:6] == "HETATM":
                x = float(line[30:38])
                y = float(line[38:46])
                z = float(line[46:54])
                label = _get_cavity_label(line[17:20].strip())
                xyzl.append([x, y, z, label])

    return numpy.asarray(xyzl)


def read_cavity(
    cavity: Union[str, pathlib.Path],
    receptor: Union[str, pathlib.Path],
    step: Union[float, int] = 0.6,
    probe_in: Union[float, int] = 1.4,
    probe_out: Union[float, int] = 4.0,
    surface: str = "SES",
    vdw: Optional[Dict[str, Dict[str, float]]] = None,
    nthreads: Optional[int] = None,
    verbose: bool = False,
) -> numpy.ndarray:
    """Read cavities and receptor inside a 3D grid.

    Parameters
    ----------
    cavity : Union[str, pathlib.Path]
        A path to a PDB file of cavities.
    receptor : Union[str, pathlib.Path]
        A path to a PDB or XYZ file of the receptor.
    step : Union[float, int], optional
        Grid spacing (A), by default 0.6.
    probe_in : Union[float, int], optional
        Probe In size (A), by default 1.4.
    probe_out : Union[float, int], optional
        Probe Out size (A), by default 4.0.
    surface : str, optional
        Surface representation. Keywords options are SES (Solvent Excluded Surface) or SAS (Solvent
        Accessible Surface), by default "SES".
    vdw : Dict[str, Dict[str, float]], optional
        A dictionary containing radii values, by default None. If None, use output of `pyKVFinder.read_vdw()`.
    nthreads : Optional[int], optional
        Number of threads, by default None. If None, the number of threads is
        `os.cpu_count() - 1`.
    verbose : bool, optional
        Print extra information to standard output, by default False.

    Returns
    -------
    grid : numpy.ndarray
        Cavity and receptor points in the 3D grid (grid[nx][ny][nz]).
        Grid array has integer labels in each position, that are:

            * -1: bulk points or empty space points;

            * 0: biomolecule points;

            * >=2: cavity points.

    Raises
    ------
    TypeError
        `cavity` must be a string or a pathlib.Path.
    TypeError
        `receptor` must be a string or a pathlib.Path.
    TypeError
        `target` must have .pdb or .xyz extension.
    TypeError
        `step` must be a positive real number.
    ValueError
        `step` must be a positive real number.
    TypeError
        `probe_in` must be a non-negative real number.
    ValueError
        `probe_in` must be a non-negative real number.
    TypeError
        `probe_out` must be a non-negative real number.
    ValueError
        `probe_out` must be a non-negative real number.
    ValueError
        `probe_out` must be greater than `probe_in`.
    TypeError
        `surface` must be a str.
    TypeError
        `nthreads` must be a positive integer.
    ValueError
        `nthreads` must be a positive integer.
    TypeError
        `verbose` must be a boolean.
    ValueError
        `surface` must be SAS or SES, not {surface}.
    """
    from .grid import get_vertices, _get_sincos, _get_dimensions
    from _pyKVFinder import _fill_receptor, _fill_cavity

    # Check arguments
    if type(cavity) not in [str, pathlib.Path]:
        raise TypeError("`cavity` must be a string or a pathlib.Path.")
    if type(receptor) not in [str, pathlib.Path]:
        raise TypeError("`receptor` must be a string or a pathlib.Path.")
    elif not receptor.endswith(".pdb") and not receptor.endswith(".xyz"):
        raise TypeError("`receptor` must have .pdb or .xyz extension.")
    if type(step) not in [float, int]:
        raise TypeError("`step` must be a positive real number.")
    elif step <= 0.0:
        raise ValueError("`step` must be a positive real number.")
    if type(probe_in) not in [float, int]:
        raise TypeError("`probe_in` must be a non-negative real number.")
    elif probe_in < 0.0:
        raise ValueError("`probe_in` must be a non-negative real number.")
    if type(probe_out) not in [float, int]:
        raise TypeError("`probe_out` must be a non-negative real number.")
    elif probe_out < 0.0:
        raise ValueError("`probe_out` must be a non-negative real number.")
    elif probe_out < probe_in:
        raise ValueError("`probe_out` must be greater than `probe_in`.")
    if type(surface) not in [str]:
        raise TypeError("`surface` must be a str.")
    if nthreads is None:
        nthreads = os.cpu_count() - 1
    else:
        if type(nthreads) not in [int]:
            raise TypeError("`nthreads` must be a positive integer.")
        elif nthreads <= 0:
            raise ValueError("`nthreads` must be a positive integer.")
    if type(verbose) not in [bool]:
        raise TypeError("`verbose` must be a boolean.")

    # Convert types
    if type(step) == int:
        step = float(step)
    if type(probe_in) == int:
        probe_in = float(probe_in)
    if type(probe_out) == int:
        probe_out = float(probe_out)

    # Insert receptor inside 3D grid
    if verbose:
        print(f"> Inserting {receptor} into 3D grid")

    # Define default vdw file
    if vdw is None:
        vdw = read_vdw(VDW)

    # Load receptor coordinates and radii
    if receptor.endswith(".pdb"):
        atomic = read_pdb(receptor, vdw)
    elif receptor.endswith(".xyz"):
        atomic = read_xyz(receptor, vdw)

    # Extract xyzr from atomic
    xyzr = atomic[:, 4:].astype(numpy.float64)

    # Get vertices
    vertices = get_vertices(atomic, probe_out, step)

    # Get sincos
    sincos = _get_sincos(vertices)

    # Get dimensions
    nx, ny, nz = _get_dimensions(vertices, step)

    # Unpack vertices
    P1, P2, P3, P4 = vertices

    # Calculate number of voxels
    nvoxels = nx * ny * nz

    if surface == "SES":
        if verbose:
            print("> Surface representation: Solvent Excluded Surface (SES).")
        surface = True
    elif surface == "SAS":
        if verbose:
            print("> Surface representation: Solvent Accessible Surface (SAS).")
        surface = False
    else:
        raise ValueError(f"`surface` must be SAS or SES, not {surface}.")

    # Fill grid with receptor
    grid = _fill_receptor(
        nvoxels,
        nx,
        ny,
        nz,
        xyzr,
        P1,
        sincos,
        step,
        probe_in,
        surface,
        nthreads,
        verbose,
    ).reshape(nx, ny, nz)

    # Insert cavities inside 3D grid
    if verbose:
        print(f"> Inserting {cavity} into 3D grid")

    # Load cavities coordinates and labels
    xyzl = _read_cavity(cavity)

    # Fill grid with cavities
    _fill_cavity(grid, xyzl, P1, sincos, step, nthreads)

    return grid


def _process_box(args: argparse.Namespace) -> Dict[str, List[float]]:
    """Gets xyz coordinates of 3D grid vertices.

    Parameters
    ----------
    args (argparse.Namespace)
        Arguments passes by argparser CLI.

    Returns
    -------
    box : Dict[str, List[float]]
        A dictionary with a xyz coordinates (p1: origin,
        p2: X-axis, p3: Y-axis, p4: Z-axis) for each point.
    """
    # Create box parameter
    box = {
        "p1": args.vertices[0],
        "p2": args.vertices[1],
        "p3": args.vertices[2],
        "p4": args.vertices[3],
    }

    # Adjust if box adjustment mode
    if args.box:
        # Get probe out additions
        # p1 = (x1, y1, z1)
        x1 = (
            -(args.probe_out * args.sincos[3])
            - (args.probe_out * args.sincos[0] * args.sincos[2])
            + (args.probe_out * args.sincos[1] * args.sincos[2])
        )
        y1 = -(args.probe_out * args.sincos[1]) - (args.probe_out * args.sincos[0])
        z1 = (
            -(args.probe_out * args.sincos[2])
            + (args.probe_out * args.sincos[0] * args.sincos[3])
            - (args.probe_out * args.sincos[1] * args.sincos[3])
        )
        # p2 = (x2, y2, z2)
        x2 = (
            (args.probe_out * args.sincos[3])
            - (args.probe_out * args.sincos[0] * args.sincos[2])
            + (args.probe_out * args.sincos[1] * args.sincos[2])
        )
        y2 = -(args.probe_out * args.sincos[1]) - (args.probe_out * args.sincos[0])
        z2 = (
            (args.probe_out * args.sincos[2])
            + (args.probe_out * args.sincos[0] * args.sincos[3])
            - (args.probe_out * args.sincos[1] * args.sincos[3])
        )
        # p3 = (x3, y3, z3)
        x3 = (
            -(args.probe_out * args.sincos[3])
            + (args.probe_out * args.sincos[0] * args.sincos[2])
            + (args.probe_out * args.sincos[1] * args.sincos[2])
        )
        y3 = (args.probe_out * args.sincos[1]) - (args.probe_out * args.sincos[0])
        z3 = (
            -(args.probe_out * args.sincos[2])
            - (args.probe_out * args.sincos[0] * args.sincos[3])
            - (args.probe_out * args.sincos[1] * args.sincos[3])
        )
        # p4 = (x4, y4, z4)
        x4 = (
            -(args.probe_out * args.sincos[3])
            - (args.probe_out * args.sincos[0] * args.sincos[2])
            - (args.probe_out * args.sincos[1] * args.sincos[2])
        )
        y4 = -(args.probe_out * args.sincos[1]) + (args.probe_out * args.sincos[0])
        z4 = (
            -(args.probe_out * args.sincos[2])
            + (args.probe_out * args.sincos[0] * args.sincos[3])
            + (args.probe_out * args.sincos[1] * args.sincos[3])
        )

        # Remove probe out addition
        box["p1"] -= numpy.array([x1, y1, z1])
        box["p2"] -= numpy.array([x2, y2, z2])
        box["p3"] -= numpy.array([x3, y3, z3])
        box["p4"] -= numpy.array([x4, y4, z4])

    # Prepare to dict to toml module
    box["p1"] = numpy.around(box["p1"], 2).tolist()
    box["p2"] = numpy.around(box["p2"], 2).tolist()
    box["p3"] = numpy.around(box["p3"], 2).tolist()
    box["p4"] = numpy.around(box["p4"], 2).tolist()

    return box


def _write_parameters(args: argparse.Namespace) -> None:
    """Writes parameters used in cavity detection and characterization of
    pyKVFinder to TOML-formatted file.

    Parameters
    ----------
    args : argparse.Namespace
        Arguments passes by argparser CLI.
    """
    import toml

    # Parameters filename
    fn = os.path.join(args.output_directory, f"{args.base_name}.parameters.toml")

    # Parameters dict
    parameters = {
        "FILES": {
            "INPUT": args.input,
            "LIGAND": args.ligand,
            "BASE_NAME": args.base_name,
            "OUTPUT_DIRECTORY": args.output_directory,
            "DICTIONARY": args.dictionary,
        },
        "SETTINGS": {
            "MODES": {
                "BOX_ADJUSTMENT": args.box,
                "LIGAND_ADJUSTMENT": True if args.ligand else False,
                "DEPTH": args.depth,
                "SURFACE": args.surface,
                "IGNORE_BACKBONE": args.ignore_backbone,
            },
            "STEP": args.step,
            "PROBES": {
                "PROBE_IN": args.probe_in,
                "PROBE_OUT": args.probe_out,
            },
            "CUTOFFS": {
                "VOLUME_CUTOFF": args.volume_cutoff,
                "LIGAND_CUTOFF": args.ligand_cutoff,
                "REMOVAL_DISTANCE": args.removal_distance,
            },
            "BOX": _process_box(args),
        },
    }

    # Write to TOML file
    with open(fn, "w") as param:
        toml.dump(parameters, param)


def calculate_frequencies(
    residues: Dict[str, List[List[str]]]
) -> Dict[str, Dict[str, Dict[str, int]]]:
    """Calculate frequencies of residues and class of residues
    (R1, R2, R3, R4 and R5) for detected cavities.

    Parameters
    ----------
    residues : Dict[str, List[List[str]]]
        A dictionary with a list of interface residues for each detected
        cavity.

    Returns
    -------
    frequencies : Dict[str, Dict[str, Dict[str, int]]]
        A dictionary with frequencies of residues and class for
        residues of each detected cavity.

    Note
    ----
    The cavity nomenclature is based on the integer label. The cavity
    marked with 2, the first integer corresponding to a cavity, is KAA, the
    cavity marked with 3 is KAB, the cavity marked with 4 is KAC and so on.

    Note
    ----
    The classes of residues are:

    * Aliphatic apolar (R1): Alanine, Glycine, Isoleucine, Leucine, Methionine, Valine.

    * Aromatic (R2): Phenylalanine, Tryptophan, Tyrosine.

    * Polar Uncharged (R3): Asparagine, Cysteine, Glutamine, Proline, Serine, Threonine.

    * Negatively charged (R4): Aspartate, Glutamate.

    * Positively charged (R5): Arginine, Histidine, Lysine.

    * Non-standard (RX): Non-standard residues.
    """
    # Create a dict for frequencies
    frequencies = {}

    # Get cavity name and residues list for each detected cavity
    for name, reslist in residues.items():
        # Create a dict for cavity name
        frequencies[name] = {
            "RESIDUES": {},
            "CLASS": {},
        }
        # Get unique residues names
        residues = [res[2] for res in reslist]
        reslist = sorted(list(set(residues)))

        # Get residues frequencies
        for res in reslist:
            frequencies[name]["RESIDUES"][res] = residues.count(res)

        # Get class frequencies
        frequencies[name]["CLASS"]["R1"] = (
            frequencies[name]["RESIDUES"].get("ALA", 0)
            + frequencies[name]["RESIDUES"].get("GLY", 0)
            + frequencies[name]["RESIDUES"].get("ILE", 0)
            + frequencies[name]["RESIDUES"].get("LEU", 0)
            + frequencies[name]["RESIDUES"].get("PRO", 0)
            + frequencies[name]["RESIDUES"].get("VAL", 0)
        )
        frequencies[name]["CLASS"]["R2"] = (
            frequencies[name]["RESIDUES"].get("PHE", 0)
            + frequencies[name]["RESIDUES"].get("TRP", 0)
            + frequencies[name]["RESIDUES"].get("TYR", 0)
        )
        frequencies[name]["CLASS"]["R3"] = (
            frequencies[name]["RESIDUES"].get("ASN", 0)
            + frequencies[name]["RESIDUES"].get("CYS", 0)
            + frequencies[name]["RESIDUES"].get("GLN", 0)
            + frequencies[name]["RESIDUES"].get("MET", 0)
            + frequencies[name]["RESIDUES"].get("SER", 0)
            + frequencies[name]["RESIDUES"].get("THR", 0)
        )
        frequencies[name]["CLASS"]["R4"] = frequencies[name]["RESIDUES"].get(
            "ASP", 0
        ) + frequencies[name]["RESIDUES"].get("GLU", 0)
        frequencies[name]["CLASS"]["R5"] = (
            frequencies[name]["RESIDUES"].get("ARG", 0)
            + frequencies[name]["RESIDUES"].get("HIS", 0)
            + frequencies[name]["RESIDUES"].get("LYS", 0)
        )
        frequencies[name]["CLASS"]["RX"] = len(residues) - sum(
            frequencies[name]["CLASS"].values()
        )

    return frequencies


def plot_frequencies(
    frequencies: Dict[str, Dict[str, Dict[str, int]]],
    fn: Union[str, pathlib.Path] = "barplots.pdf",
) -> None:
    """Plot bar charts of calculated frequencies (residues and classes of
    residues) for each detected cavity in a target PDF file.

    Parameters
    ----------
    frequencies : Dict[str, Dict[str, Dict[str, int]]]
        A dictionary with frequencies of residues and class for
        residues of each detected cavity.
    fn : Union[str, pathlib.Path], optional
        A path to PDF file for plotting bar charts of frequencies, by
        default `barplots.pdf`.

    Raises
    ------
    TypeError
        `fn` must be a string or a pathlib.Path.

    Note
    ----
    The cavity nomenclature is based on the integer label. The cavity
    marked with 2, the first integer corresponding to a cavity, is KAA, the
    cavity marked with 3 is KAB, the cavity marked with 4 is KAC and so on.

    Note
    ----
    The classes of residues are:

    * Aliphatic apolar (R1): Alanine, Glycine, Isoleucine, Leucine, Methionine, Valine.

    * Aromatic (R2): Phenylalanine, Tryptophan, Tyrosine.

    * Polar Uncharged (R3): Asparagine, Cysteine, Glutamine, Proline, Serine, Threonine.

    * Negatively charged (R4): Aspartate, Glutamate.

    * Positively charged (R5): Arginine, Histidine, Lysine.

    * Non-standard (RX): Non-standard residues.
    """
    import matplotlib.pyplot as plt
    from matplotlib.backends.backend_pdf import PdfPages

    # Check arguments
    if type(fn) not in [str, pathlib.Path]:
        raise TypeError("`fn` must be a string or a pathlib.Path.")

    # Create base directories of output PDF file
    os.makedirs(os.path.abspath(os.path.dirname(fn)), exist_ok=True)

    # Create a dictionary for standard amino acids
    tmp = {
        "ALA": 0,
        "ARG": 0,
        "ASN": 0,
        "ASP": 0,
        "CYS": 0,
        "GLN": 0,
        "GLU": 0,
        "GLY": 0,
        "HIS": 0,
        "ILE": 0,
        "LEU": 0,
        "LYS": 0,
        "MET": 0,
        "PHE": 0,
        "PRO": 0,
        "SER": 0,
        "THR": 0,
        "TRP": 0,
        "TYR": 0,
        "VAL": 0,
    }

    with PdfPages(fn) as pdf:
        # Standardize data
        ymax = 0
        for cavity_tag in frequencies.keys():
            # Include missing residues
            frequencies[cavity_tag]["RESIDUES"] = {
                **tmp,
                **frequencies[cavity_tag]["RESIDUES"],
            }
            # Get y maximum
            if ymax < max(frequencies[cavity_tag]["CLASS"].values()):
                ymax = max(frequencies[cavity_tag]["CLASS"].values())
        ymax += 1

        # Pdf plots
        for cavity_tag in frequencies.keys():
            # Create page
            fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 9), dpi=300)
            fig.suptitle(r"Cavity " + f"{cavity_tag}", fontsize=30)

            # Frequency residues
            x = list(frequencies[cavity_tag]["RESIDUES"].keys())
            y = frequencies[cavity_tag]["RESIDUES"].values()
            colors = [
                "tab:cyan",
                "tab:purple",
                "tab:green",
                "tab:red",
                "tab:green",
                "tab:green",
                "tab:red",
                "tab:cyan",
                "tab:purple",
                "tab:cyan",
                "tab:cyan",
                "tab:purple",
                "tab:green",
                "tab:orange",
                "tab:cyan",
                "tab:green",
                "tab:green",
                "tab:orange",
                "tab:orange",
                "tab:cyan",
            ]
            for _ in range(len(x) - len(colors)):
                colors.append("tab:gray")

            ax1.bar(x, y, align="center", edgecolor="black", color=colors)
            ax1.set_xlabel(None)
            ax1.set_xlim(-1, len(x))
            ax1.tick_params(axis="x", labelsize=15, rotation=45)
            ax1.tick_params(axis="y", labelsize=20)
            ax1.set_ylabel(r"Frequency", fontsize=20)
            ax1.set_ylim(0, ymax)
            ax1.grid(which="major", axis="y", linestyle="--")

            # Frequency classes
            x = list(frequencies[cavity_tag]["CLASS"].keys())
            y = frequencies[cavity_tag]["CLASS"].values()
            colors = [
                "tab:cyan",
                "tab:orange",
                "tab:green",
                "tab:red",
                "tab:purple",
                "tab:gray",
            ]

            ax2.bar(x=x, height=y, align="center", edgecolor="black", color=colors)
            ax2.set_xlabel(None)
            ax2.set_xlim(-1, len(x))
            ax2.tick_params(axis="x", labelsize=20)
            ax2.tick_params(axis="y", labelsize=20)
            ax2.set_ylabel(None)
            ax2.set_ylim(0, ymax)
            ax2.grid(which="major", axis="y", linestyle="--")

            # Legend
            labels = [
                r"Aliphatic apolar",
                r"Aromatic",
                r"Polar uncharged",
                r"Negatively charged",
                r"Positively charged",
                r"Non-standard",
            ]
            handles = [
                plt.Rectangle((0, 0), 1, 1, facecolor=colors[label], edgecolor="black")
                for label in range(len(labels))
            ]
            fig.legend(
                handles,
                labels,
                fontsize=15,
                fancybox=True,
                shadow=True,
                loc="lower center",
                ncol=6,
            )

            # Adjust plots
            fig.tight_layout()
            fig.subplots_adjust(bottom=0.12)

            # Save page
            pdf.savefig()
            plt.close()


def write_results(
    fn: Union[str, pathlib.Path],
    input: Optional[Union[str, pathlib.Path]],
    ligand: Optional[Union[str, pathlib.Path]],
    output: Optional[Union[str, pathlib.Path]],
    output_hydropathy: Optional[Union[str, pathlib.Path]] = None,
    volume: Optional[Dict[str, float]] = None,
    area: Optional[Dict[str, float]] = None,
    max_depth: Optional[Dict[str, float]] = None,
    avg_depth: Optional[Dict[str, float]] = None,
    avg_hydropathy: Optional[Dict[str, float]] = None,
    residues: Optional[Dict[str, List[List[str]]]] = None,
    frequencies: Optional[Dict[str, Dict[str, Dict[str, int]]]] = None,
    step: Union[float, int] = 0.6,
) -> None:
    """Writes file paths and cavity characterization to TOML-formatted file.

    Parameters
    ----------
    fn : Union[str, pathlib.Path]
        A path to TOML-formatted file for writing file paths and
        cavity characterization (volume, area, depth [optional] and interface
        residues) per cavity detected.
    input : Union[str, pathlib.Path], optional
        A path to input PDB or XYZ file.
    ligand : Union[str, pathlib.Path], optional
        A path to ligand PDB or XYZ file.
    output : Union[str, pathlib.Path], optional
        A path to cavity PDB file.
    output_hydropathy : Union[str, pathlib.Path], optional
        A path to hydropathy PDB file (surface points mapped with a
        hydrophobicity scale), by default None.
    volume : Dict[str, float], optional
        A dictionary with volume of each detected cavity, by default None.
    area : Dict[str, float], optional
        A dictionary with area of each detected cavity, by default None.
    max_depth : Dict[str, float], optional
        A dictionary with maximum depth of each detected cavity, by default
        None.
    avg_depth : Dict[str, float], optional
        A dictionary with average depth of each detected cavity, by default
        None.
    avg_hydropapthy : Dict[str, float], optional
        A dictionary with average hydropathy of each detected cavity and range
        of the hydrophobicity scale mapped, by default None.
    residues : Dict[str, List[List[str]]], optional
        A dictionary with interface residues of each detected cavity, by
        default None.
    frequencies : Dict[str, Dict[str, Dict[str, int]]], optional
        A dictionary with frequencies of interface residues and classes of
        residues of each detected cavity, by default None.
    step : Union[float, int], optional
        Grid spacing (A), by default 0.6.

    Raises
    ------
    TypeError
        `fn` must be a string or a pathlib.Path.
    TypeError
        `input` must be a string or a pathlib.Path.
    TypeError
        `ligand` must be a string or a pathlib.Path.
    TypeError
        `output` must be a string or a pathlib.Path.
    TypeError
        `output_hydropathy` must be a string or a pathlib.Path.
    TypeError
        `volume` must be a dictionary.
    TypeError
        `area` must be a dictionary.
    TypeError
        `max_depth` must be a dictionary.
    TypeError
        `avg_depth` must be a dictionary.
    TypeError
        `avg_hydropathy` must be a dictionary.
    TypeError
        `residues` must be a dictionary.
    TypeError
        `frequencies` must be a dictionary.
    TypeError
        `step` must be a positive real number.
    ValueError
        `step` must be a positive real number.

    Note
    ----
    The cavity nomenclature is based on the integer label. The cavity
    marked with 2, the first integer corresponding to a cavity, is KAA, the
    cavity marked with 3 is KAB, the cavity marked with 4 is KAC and so on.
    """
    import toml

    # Check arguments
    if type(fn) not in [str, pathlib.Path]:
        raise TypeError("`fn` must be a string or a pathlib.Path.")
    if input is not None:
        if type(input) not in [str, pathlib.Path]:
            raise TypeError("`input` must be a string or a pathlib.Path.")
    if ligand is not None:
        if type(ligand) not in [str, pathlib.Path]:
            raise TypeError("`ligand` must be a string or a pathlib.Path.")
    if output is not None:
        if type(output) not in [str, pathlib.Path]:
            raise TypeError("`output` must be a string or a pathlib.Path.")
    if output_hydropathy is not None:
        if type(output_hydropathy) not in [str, pathlib.Path]:
            raise TypeError("`output_hydropathy` must be a string or a pathlib.Path.")
    if volume is not None:
        if type(volume) not in [dict]:
            raise TypeError("`volume` must be a dictionary.")
    if area is not None:
        if type(area) not in [dict]:
            raise TypeError("`area` must be a dictionary.")
    if max_depth is not None:
        if type(max_depth) not in [dict]:
            raise TypeError("`max_depth` must be a dictionary.")
    if avg_depth is not None:
        if type(avg_depth) not in [dict]:
            raise TypeError("`avg_depth` must be a dictionary.")
    if avg_hydropathy is not None:
        if type(avg_hydropathy) not in [dict]:
            raise TypeError("`avg_hydropathy` must be a dictionary.")
    if residues is not None:
        if type(residues) not in [dict]:
            raise TypeError("`residues` must be a dictionary.")
    if frequencies is not None:
        if type(frequencies) not in [dict]:
            raise TypeError("`frequencies` must be a dictionary.")
    if type(step) not in [float, int]:
        raise TypeError("`step` must be a positive real number.")
    elif step <= 0.0:
        raise ValueError("`step` must be a positive real number.")

    # Convert types
    if type(step) == int:
        step = float(step)

    # Create base directories of results
    os.makedirs(os.path.abspath(os.path.dirname(fn)), exist_ok=True)

    # Prepare paths
    input = os.path.abspath(input)
    if ligand:
        ligand = os.path.abspath(ligand)
    if output:
        output = os.path.abspath(output)
    if output_hydropathy:
        output_hydropathy = os.path.abspath(output_hydropathy)

    # Create results dictionary
    results = {
        "FILES": {
            "INPUT": input,
            "LIGAND": ligand,
            "OUTPUT": output,
            "HYDROPATHY": output_hydropathy,
        },
        "PARAMETERS": {
            "STEP": step,
        },
        "RESULTS": {
            "VOLUME": volume,
            "AREA": area,
            "MAX_DEPTH": max_depth,
            "AVG_DEPTH": avg_depth,
            "AVG_HYDROPATHY": avg_hydropathy,
            "RESIDUES": residues,
            "FREQUENCY": frequencies,
        },
    }

    # Create base directories of results TOML file
    os.makedirs(os.path.abspath(os.path.dirname(fn)), exist_ok=True)

    # Write results to TOML file
    with open(fn, "w") as f:
        f.write("# pyKVFinder results\n\n")
        toml.dump(results, f)