import os
import logging
import argparse
import pathlib
import numpy
from typing import Dict, List, Union, Optional
__all__ = [
VDW = os.path.join(os.path.abspath(os.path.dirname(__file__)), "data/vdw.dat")
[docs]def read_vdw(
fn: Optional[Union[str, pathlib.Path]] = None
) -> Dict[str, Dict[str, float]]:
"""Reads van der Waals radii from .dat file.
fn : Optional[Union[str, pathlib.Path]], optional
A path to a van der Waals radii file, by default None. If None, apply the built-in van der
Waals radii file: `vdw.dat`.
vdw : Dict[str, Dict[str, float]]
A dictionary containing radii values.
`fn` must be a string or a pathlib.Path.
A line in `vdw` has incorrect format. The values must be double
A line in `vdw` has an incorrect radius type for an atom.
The van der Waals radii file defines the radius values for each
atom by residue and when not defined, it uses a generic value
based on the atom type (see `van der Waals file template`).
The package contains a built-in van der Waals radii file: `vdw.dat`.
# Check argument
if fn is not None:
if type(fn) not in [str, pathlib.Path]:
raise TypeError("`fn` must be a string or a pathlib.Path.")
# Define default vdw file
fn = VDW
# Create vdw dictionary
vdw = {}
# Open fn
with open(fn, "r") as f:
# Read line with data only (ignore empty lines)
lines = [
line.replace(" ", "")
for line in
if line.replace("\t\t", "")
for line in lines:
if not line.startswith("#"):
if line.startswith(">"):
res = line.replace(">", "").replace("\t\t", "").replace(" ", "")
vdw[res] = {}
atom, radius = line.split("\t\t")
except ValueError:
if len(line.split("\t\t")) != 2:
raise ValueError(
"A line in `vdw` has incorrect format. \
The values must be double tab-separated."
vdw[res][atom] = float(radius)
except ValueError:
raise ValueError(
"A line in `vdw` has an incorrect radius type for \
an atom."
return vdw
def _process_pdb_line(
line: str, vdw: Dict[str, Dict[str, float]]
) -> List[Union[str, float, int]]:
"""Extracts ATOM and HETATM information of PDB line.
line : str
A line of a valid PDB file
vdw : Dict[str, Dict[str, Dict[str, float]]]
A dictionary containing radii values.
atomic : List[Union[str, float, int]]
A list with resnum, chain, resname, atom name, xyz coordinates and radius.
# Get PDB infomation
atom = line[12:16].strip()
resname = line[17:20].strip()
resnum = int(line[22:26])
chain = line[21]
x = float(line[30:38])
y = float(line[38:46])
z = float(line[46:54])
atom_symbol = line[76:78].strip().upper()
# Get atom and radius from vdw
if resname in vdw.keys() and atom in vdw[resname].keys():
radius = vdw[resname][atom]
radius = vdw["GEN"][atom_symbol]
f"Warning: Atom {atom} of residue {resname} \
not found in dictionary."
f"Warning: Using generic atom {atom_symbol} \
radius: {radius} \u00c5."
# Prepare output
atomic = [resnum, chain, resname, atom, x, y, z, radius]
return atomic
def read_pdb(
fn: Union[str, pathlib.Path],
vdw: Optional[Dict[str, Dict[str, float]]] = None,
model: Optional[int] = None,
) -> numpy.ndarray:
"""Reads PDB file into numpy.ndarrays.
fn : Union[str, pathlib.Path]
A path to PDB file.
vdw : Dict[str, Dict[str, float]], optional
A dictionary containing radii values, by default None. If None, use output of `pyKVFinder.read_vdw()`.
model : int, optional
Model number, by default None. If None, keep atoms from all models.
atomic : numpy.ndarray
A numpy array with atomic data (residue number, chain, residue name, atom name, xyz coordinates
and radius) for each atom.
`fn` must be a string or a pathlib.Path.
The van der Waals radii file defines the radius values for each atom
by residue and when not defined, it uses a generic value based on the
atom type. The function by default loads the built-in van der Waals radii
file: `vdw.dat`.
# Check arguments
if type(fn) not in [str, pathlib.Path]:
raise TypeError("`fn` must be a string or a pathlib.Path.")
if model is not None:
if type(model) not in [int]:
raise TypeError("`model` must be an integer.")
# Define default vdw file
if vdw is None:
vdw = read_vdw(VDW)
# Create lists
atomic = []
# Keep all models
keep = True if model is None else False
# Read file and process atoms
with open(fn, "r") as f:
for line in f.readlines():
if model is not None:
if line[:5] == "MODEL":
nmodel = int(line[5:].replace(" ", "").rstrip("\n"))
keep = True if model == nmodel else False
if keep:
if line[:4] == "ATOM" or line[:6] == "HETATM":
atomic.append(_process_pdb_line(line, vdw))
return numpy.asarray(atomic)
def read_xyz(
fn: Union[str, pathlib.Path], vdw: Optional[Dict[str, Dict[str, float]]] = None
) -> numpy.ndarray:
"""Reads XYZ file into numpy.ndarrays.
fn : Union[str, pathlib.Path]
A path to XYZ file.
vdw : Dict[str, Dict[str, float]], optional
A dictionary containing radii values, by default None. If None, use output of `pyKVFinder.read_vdw()`.
atomic : numpy.ndarray
A numpy array with atomic data (residue number, chain, residue name, atom name, xyz coordinates
and radius) for each atom.
`fn` must be a string or a pathlib.Path.
The van der Waals radii file defines the radius values for each atom
by residue and when not defined, it uses a generic value based on the
atom type. The function by default loads the built-in van der Waals radii
file: `vdw.dat`.
# Check arguments
if type(fn) not in [str, pathlib.Path]:
raise TypeError("`fn` must be a string or a pathlib.Path.")
# Define default vdw file
if vdw is None:
vdw = read_vdw(VDW)
# Create lists
atomic = []
# Start resnum
resnum = 0
# Read XYZ file
with open(fn, "r") as f:
for line in f.readlines():
line = line.split()
if len(line) == 4:
# Get PDB information
atom_symbol = line[0].upper()
x = float(line[1])
y = float(line[2])
z = float(line[3])
# Get radius (generic value)
radius = vdw["GEN"][atom_symbol]
# Get resnum
resnum += 1
# Append data
atomic.append([resnum, "A", "UNK", atom_symbol, x, y, z, radius])
return numpy.asarray(atomic)
def _read_cavity(cavity: Union[str, pathlib.Path]) -> numpy.ndarray:
"""Reads xyz coordinates and labels of a cavities file into numpy.ndarray.
cavity : Union[str, pathlib.Path]
A path to a PDB-formatted file of cavities.
xyzl : numpy.ndarray
A numpy.ndarray with xyz coordinates and cavity label for each cavity point.
from .grid import _get_cavity_label
# Create xyzl (xyz coordinates and cavity label)
xyzl = []
# Read cavity file into list
with open(cavity, "r") as f:
for line in f.readlines():
if line[:4] == "ATOM" or line[:6] == "HETATM":
x = float(line[30:38])
y = float(line[38:46])
z = float(line[46:54])
label = _get_cavity_label(line[17:20].strip())
xyzl.append([x, y, z, label])
return numpy.asarray(xyzl)
def read_cavity(
cavity: Union[str, pathlib.Path],
receptor: Union[str, pathlib.Path],
step: Union[float, int] = 0.6,
probe_in: Union[float, int] = 1.4,
probe_out: Union[float, int] = 4.0,
surface: str = "SES",
vdw: Optional[Dict[str, Dict[str, float]]] = None,
nthreads: Optional[int] = None,
verbose: bool = False,
) -> numpy.ndarray:
"""Read cavities and receptor inside a 3D grid.
cavity : Union[str, pathlib.Path]
A path to a PDB file of cavities.
receptor : Union[str, pathlib.Path]
A path to a PDB or XYZ file of the receptor.
step : Union[float, int], optional
Grid spacing (A), by default 0.6.
probe_in : Union[float, int], optional
Probe In size (A), by default 1.4.
probe_out : Union[float, int], optional
Probe Out size (A), by default 4.0.
surface : str, optional
Surface representation. Keywords options are SES (Solvent Excluded Surface) or SAS (Solvent
Accessible Surface), by default "SES".
vdw : Dict[str, Dict[str, float]], optional
A dictionary containing radii values, by default None. If None, use output of `pyKVFinder.read_vdw()`.
nthreads : Optional[int], optional
Number of threads, by default None. If None, the number of threads is
`os.cpu_count() - 1`.
verbose : bool, optional
Print extra information to standard output, by default False.
grid : numpy.ndarray
Cavity and receptor points in the 3D grid (grid[nx][ny][nz]).
Grid array has integer labels in each position, that are:
* -1: bulk points or empty space points;
* 0: biomolecule points;
* >=2: cavity points.
`cavity` must be a string or a pathlib.Path.
`receptor` must be a string or a pathlib.Path.
`target` must have .pdb or .xyz extension.
`step` must be a positive real number.
`step` must be a positive real number.
`probe_in` must be a non-negative real number.
`probe_in` must be a non-negative real number.
`probe_out` must be a non-negative real number.
`probe_out` must be a non-negative real number.
`probe_out` must be greater than `probe_in`.
`surface` must be a str.
`nthreads` must be a positive integer.
`nthreads` must be a positive integer.
`verbose` must be a boolean.
`surface` must be SAS or SES, not {surface}.
from .grid import get_vertices, _get_sincos, _get_dimensions
from _pyKVFinder import _fill_receptor, _fill_cavity
# Check arguments
if type(cavity) not in [str, pathlib.Path]:
raise TypeError("`cavity` must be a string or a pathlib.Path.")
if type(receptor) not in [str, pathlib.Path]:
raise TypeError("`receptor` must be a string or a pathlib.Path.")
elif not receptor.endswith(".pdb") and not receptor.endswith(".xyz"):
raise TypeError("`receptor` must have .pdb or .xyz extension.")
if type(step) not in [float, int]:
raise TypeError("`step` must be a positive real number.")
elif step <= 0.0:
raise ValueError("`step` must be a positive real number.")
if type(probe_in) not in [float, int]:
raise TypeError("`probe_in` must be a non-negative real number.")
elif probe_in < 0.0:
raise ValueError("`probe_in` must be a non-negative real number.")
if type(probe_out) not in [float, int]:
raise TypeError("`probe_out` must be a non-negative real number.")
elif probe_out < 0.0:
raise ValueError("`probe_out` must be a non-negative real number.")
elif probe_out < probe_in:
raise ValueError("`probe_out` must be greater than `probe_in`.")
if type(surface) not in [str]:
raise TypeError("`surface` must be a str.")
if nthreads is None:
nthreads = os.cpu_count() - 1
if type(nthreads) not in [int]:
raise TypeError("`nthreads` must be a positive integer.")
elif nthreads <= 0:
raise ValueError("`nthreads` must be a positive integer.")
if type(verbose) not in [bool]:
raise TypeError("`verbose` must be a boolean.")
# Convert types
if type(step) == int:
step = float(step)
if type(probe_in) == int:
probe_in = float(probe_in)
if type(probe_out) == int:
probe_out = float(probe_out)
# Insert receptor inside 3D grid
if verbose:
print(f"> Inserting {receptor} into 3D grid")
# Define default vdw file
if vdw is None:
vdw = read_vdw(VDW)
# Load receptor coordinates and radii
if receptor.endswith(".pdb"):
atomic = read_pdb(receptor, vdw)
elif receptor.endswith(".xyz"):
atomic = read_xyz(receptor, vdw)
# Extract xyzr from atomic
xyzr = atomic[:, 4:].astype(numpy.float64)
# Get vertices
vertices = get_vertices(atomic, probe_out, step)
# Get sincos
sincos = _get_sincos(vertices)
# Get dimensions
nx, ny, nz = _get_dimensions(vertices, step)
# Unpack vertices
P1, P2, P3, P4 = vertices
# Calculate number of voxels
nvoxels = nx * ny * nz
if surface == "SES":
if verbose:
print("> Surface representation: Solvent Excluded Surface (SES).")
surface = True
elif surface == "SAS":
if verbose:
print("> Surface representation: Solvent Accessible Surface (SAS).")
surface = False
raise ValueError(f"`surface` must be SAS or SES, not {surface}.")
# Fill grid with receptor
grid = _fill_receptor(
).reshape(nx, ny, nz)
# Insert cavities inside 3D grid
if verbose:
print(f"> Inserting {cavity} into 3D grid")
# Load cavities coordinates and labels
xyzl = _read_cavity(cavity)
# Fill grid with cavities
_fill_cavity(grid, xyzl, P1, sincos, step, nthreads)
return grid
def _process_box(args: argparse.Namespace) -> Dict[str, List[float]]:
"""Gets xyz coordinates of 3D grid vertices.
args (argparse.Namespace)
Arguments passes by argparser CLI.
box : Dict[str, List[float]]
A dictionary with a xyz coordinates (p1: origin,
p2: X-axis, p3: Y-axis, p4: Z-axis) for each point.
# Create box parameter
box = {
"p1": args.vertices[0],
"p2": args.vertices[1],
"p3": args.vertices[2],
"p4": args.vertices[3],
# Adjust if box adjustment mode
# Get probe out additions
# p1 = (x1, y1, z1)
x1 = (
-(args.probe_out * args.sincos[3])
- (args.probe_out * args.sincos[0] * args.sincos[2])
+ (args.probe_out * args.sincos[1] * args.sincos[2])
y1 = -(args.probe_out * args.sincos[1]) - (args.probe_out * args.sincos[0])
z1 = (
-(args.probe_out * args.sincos[2])
+ (args.probe_out * args.sincos[0] * args.sincos[3])
- (args.probe_out * args.sincos[1] * args.sincos[3])
# p2 = (x2, y2, z2)
x2 = (
(args.probe_out * args.sincos[3])
- (args.probe_out * args.sincos[0] * args.sincos[2])
+ (args.probe_out * args.sincos[1] * args.sincos[2])
y2 = -(args.probe_out * args.sincos[1]) - (args.probe_out * args.sincos[0])
z2 = (
(args.probe_out * args.sincos[2])
+ (args.probe_out * args.sincos[0] * args.sincos[3])
- (args.probe_out * args.sincos[1] * args.sincos[3])
# p3 = (x3, y3, z3)
x3 = (
-(args.probe_out * args.sincos[3])
+ (args.probe_out * args.sincos[0] * args.sincos[2])
+ (args.probe_out * args.sincos[1] * args.sincos[2])
y3 = (args.probe_out * args.sincos[1]) - (args.probe_out * args.sincos[0])
z3 = (
-(args.probe_out * args.sincos[2])
- (args.probe_out * args.sincos[0] * args.sincos[3])
- (args.probe_out * args.sincos[1] * args.sincos[3])
# p4 = (x4, y4, z4)
x4 = (
-(args.probe_out * args.sincos[3])
- (args.probe_out * args.sincos[0] * args.sincos[2])
- (args.probe_out * args.sincos[1] * args.sincos[2])
y4 = -(args.probe_out * args.sincos[1]) + (args.probe_out * args.sincos[0])
z4 = (
-(args.probe_out * args.sincos[2])
+ (args.probe_out * args.sincos[0] * args.sincos[3])
+ (args.probe_out * args.sincos[1] * args.sincos[3])
# Remove probe out addition
box["p1"] -= numpy.array([x1, y1, z1])
box["p2"] -= numpy.array([x2, y2, z2])
box["p3"] -= numpy.array([x3, y3, z3])
box["p4"] -= numpy.array([x4, y4, z4])
# Prepare to dict to toml module
box["p1"] = numpy.around(box["p1"], 2).tolist()
box["p2"] = numpy.around(box["p2"], 2).tolist()
box["p3"] = numpy.around(box["p3"], 2).tolist()
box["p4"] = numpy.around(box["p4"], 2).tolist()
return box
def _write_parameters(args: argparse.Namespace) -> None:
"""Writes parameters used in cavity detection and characterization of
pyKVFinder to TOML-formatted file.
args : argparse.Namespace
Arguments passes by argparser CLI.
import toml
# Parameters filename
fn = os.path.join(args.output_directory, f"{args.base_name}.parameters.toml")
# Parameters dict
parameters = {
"FILES": {
"INPUT": args.input,
"LIGAND": args.ligand,
"BASE_NAME": args.base_name,
"OUTPUT_DIRECTORY": args.output_directory,
"DICTIONARY": args.dictionary,
"MODES": {
"LIGAND_ADJUSTMENT": True if args.ligand else False,
"DEPTH": args.depth,
"SURFACE": args.surface,
"IGNORE_BACKBONE": args.ignore_backbone,
"STEP": args.step,
"PROBE_IN": args.probe_in,
"PROBE_OUT": args.probe_out,
"VOLUME_CUTOFF": args.volume_cutoff,
"LIGAND_CUTOFF": args.ligand_cutoff,
"REMOVAL_DISTANCE": args.removal_distance,
"BOX": _process_box(args),
# Write to TOML file
with open(fn, "w") as param:
toml.dump(parameters, param)
def calculate_frequencies(
residues: Dict[str, List[List[str]]]
) -> Dict[str, Dict[str, Dict[str, int]]]:
"""Calculate frequencies of residues and class of residues
(R1, R2, R3, R4 and R5) for detected cavities.
residues : Dict[str, List[List[str]]]
A dictionary with a list of interface residues for each detected
frequencies : Dict[str, Dict[str, Dict[str, int]]]
A dictionary with frequencies of residues and class for
residues of each detected cavity.
The cavity nomenclature is based on the integer label. The cavity
marked with 2, the first integer corresponding to a cavity, is KAA, the
cavity marked with 3 is KAB, the cavity marked with 4 is KAC and so on.
The classes of residues are:
* Aliphatic apolar (R1): Alanine, Glycine, Isoleucine, Leucine, Methionine, Valine.
* Aromatic (R2): Phenylalanine, Tryptophan, Tyrosine.
* Polar Uncharged (R3): Asparagine, Cysteine, Glutamine, Proline, Serine, Threonine.
* Negatively charged (R4): Aspartate, Glutamate.
* Positively charged (R5): Arginine, Histidine, Lysine.
* Non-standard (RX): Non-standard residues.
# Create a dict for frequencies
frequencies = {}
# Get cavity name and residues list for each detected cavity
for name, reslist in residues.items():
# Create a dict for cavity name
frequencies[name] = {
"CLASS": {},
# Get unique residues names
residues = [res[2] for res in reslist]
reslist = sorted(list(set(residues)))
# Get residues frequencies
for res in reslist:
frequencies[name]["RESIDUES"][res] = residues.count(res)
# Get class frequencies
frequencies[name]["CLASS"]["R1"] = (
frequencies[name]["RESIDUES"].get("ALA", 0)
+ frequencies[name]["RESIDUES"].get("GLY", 0)
+ frequencies[name]["RESIDUES"].get("ILE", 0)
+ frequencies[name]["RESIDUES"].get("LEU", 0)
+ frequencies[name]["RESIDUES"].get("PRO", 0)
+ frequencies[name]["RESIDUES"].get("VAL", 0)
frequencies[name]["CLASS"]["R2"] = (
frequencies[name]["RESIDUES"].get("PHE", 0)
+ frequencies[name]["RESIDUES"].get("TRP", 0)
+ frequencies[name]["RESIDUES"].get("TYR", 0)
frequencies[name]["CLASS"]["R3"] = (
frequencies[name]["RESIDUES"].get("ASN", 0)
+ frequencies[name]["RESIDUES"].get("CYS", 0)
+ frequencies[name]["RESIDUES"].get("GLN", 0)
+ frequencies[name]["RESIDUES"].get("MET", 0)
+ frequencies[name]["RESIDUES"].get("SER", 0)
+ frequencies[name]["RESIDUES"].get("THR", 0)
frequencies[name]["CLASS"]["R4"] = frequencies[name]["RESIDUES"].get(
"ASP", 0
) + frequencies[name]["RESIDUES"].get("GLU", 0)
frequencies[name]["CLASS"]["R5"] = (
frequencies[name]["RESIDUES"].get("ARG", 0)
+ frequencies[name]["RESIDUES"].get("HIS", 0)
+ frequencies[name]["RESIDUES"].get("LYS", 0)
frequencies[name]["CLASS"]["RX"] = len(residues) - sum(
return frequencies
def plot_frequencies(
frequencies: Dict[str, Dict[str, Dict[str, int]]],
fn: Union[str, pathlib.Path] = "barplots.pdf",
) -> None:
"""Plot bar charts of calculated frequencies (residues and classes of
residues) for each detected cavity in a target PDF file.
frequencies : Dict[str, Dict[str, Dict[str, int]]]
A dictionary with frequencies of residues and class for
residues of each detected cavity.
fn : Union[str, pathlib.Path], optional
A path to PDF file for plotting bar charts of frequencies, by
default `barplots.pdf`.
`fn` must be a string or a pathlib.Path.
The cavity nomenclature is based on the integer label. The cavity
marked with 2, the first integer corresponding to a cavity, is KAA, the
cavity marked with 3 is KAB, the cavity marked with 4 is KAC and so on.
The classes of residues are:
* Aliphatic apolar (R1): Alanine, Glycine, Isoleucine, Leucine, Methionine, Valine.
* Aromatic (R2): Phenylalanine, Tryptophan, Tyrosine.
* Polar Uncharged (R3): Asparagine, Cysteine, Glutamine, Proline, Serine, Threonine.
* Negatively charged (R4): Aspartate, Glutamate.
* Positively charged (R5): Arginine, Histidine, Lysine.
* Non-standard (RX): Non-standard residues.
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
# Check arguments
if type(fn) not in [str, pathlib.Path]:
raise TypeError("`fn` must be a string or a pathlib.Path.")
# Create base directories of output PDF file
os.makedirs(os.path.abspath(os.path.dirname(fn)), exist_ok=True)
# Create a dictionary for standard amino acids
tmp = {
"ALA": 0,
"ARG": 0,
"ASN": 0,
"ASP": 0,
"CYS": 0,
"GLN": 0,
"GLU": 0,
"GLY": 0,
"HIS": 0,
"ILE": 0,
"LEU": 0,
"LYS": 0,
"MET": 0,
"PHE": 0,
"PRO": 0,
"SER": 0,
"THR": 0,
"TRP": 0,
"TYR": 0,
"VAL": 0,
with PdfPages(fn) as pdf:
# Standardize data
ymax = 0
for cavity_tag in frequencies.keys():
# Include missing residues
frequencies[cavity_tag]["RESIDUES"] = {
# Get y maximum
if ymax < max(frequencies[cavity_tag]["CLASS"].values()):
ymax = max(frequencies[cavity_tag]["CLASS"].values())
ymax += 1
# Pdf plots
for cavity_tag in frequencies.keys():
# Create page
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 9), dpi=300)
fig.suptitle(r"Cavity " + f"{cavity_tag}", fontsize=30)
# Frequency residues
x = list(frequencies[cavity_tag]["RESIDUES"].keys())
y = frequencies[cavity_tag]["RESIDUES"].values()
colors = [
for _ in range(len(x) - len(colors)):
colors.append("tab:gray"), y, align="center", edgecolor="black", color=colors)
ax1.set_xlim(-1, len(x))
ax1.tick_params(axis="x", labelsize=15, rotation=45)
ax1.tick_params(axis="y", labelsize=20)
ax1.set_ylabel(r"Frequency", fontsize=20)
ax1.set_ylim(0, ymax)
ax1.grid(which="major", axis="y", linestyle="--")
# Frequency classes
x = list(frequencies[cavity_tag]["CLASS"].keys())
y = frequencies[cavity_tag]["CLASS"].values()
colors = [
], height=y, align="center", edgecolor="black", color=colors)
ax2.set_xlim(-1, len(x))
ax2.tick_params(axis="x", labelsize=20)
ax2.tick_params(axis="y", labelsize=20)
ax2.set_ylim(0, ymax)
ax2.grid(which="major", axis="y", linestyle="--")
# Legend
labels = [
r"Aliphatic apolar",
r"Polar uncharged",
r"Negatively charged",
r"Positively charged",
handles = [
plt.Rectangle((0, 0), 1, 1, facecolor=colors[label], edgecolor="black")
for label in range(len(labels))
loc="lower center",
# Adjust plots
# Save page
def write_results(
fn: Union[str, pathlib.Path],
input: Optional[Union[str, pathlib.Path]],
ligand: Optional[Union[str, pathlib.Path]],
output: Optional[Union[str, pathlib.Path]],
output_hydropathy: Optional[Union[str, pathlib.Path]] = None,
volume: Optional[Dict[str, float]] = None,
area: Optional[Dict[str, float]] = None,
max_depth: Optional[Dict[str, float]] = None,
avg_depth: Optional[Dict[str, float]] = None,
avg_hydropathy: Optional[Dict[str, float]] = None,
residues: Optional[Dict[str, List[List[str]]]] = None,
frequencies: Optional[Dict[str, Dict[str, Dict[str, int]]]] = None,
step: Union[float, int] = 0.6,
) -> None:
"""Writes file paths and cavity characterization to TOML-formatted file.
fn : Union[str, pathlib.Path]
A path to TOML-formatted file for writing file paths and
cavity characterization (volume, area, depth [optional] and interface
residues) per cavity detected.
input : Union[str, pathlib.Path], optional
A path to input PDB or XYZ file.
ligand : Union[str, pathlib.Path], optional
A path to ligand PDB or XYZ file.
output : Union[str, pathlib.Path], optional
A path to cavity PDB file.
output_hydropathy : Union[str, pathlib.Path], optional
A path to hydropathy PDB file (surface points mapped with a
hydrophobicity scale), by default None.
volume : Dict[str, float], optional
A dictionary with volume of each detected cavity, by default None.
area : Dict[str, float], optional
A dictionary with area of each detected cavity, by default None.
max_depth : Dict[str, float], optional
A dictionary with maximum depth of each detected cavity, by default
avg_depth : Dict[str, float], optional
A dictionary with average depth of each detected cavity, by default
avg_hydropapthy : Dict[str, float], optional
A dictionary with average hydropathy of each detected cavity and range
of the hydrophobicity scale mapped, by default None.
residues : Dict[str, List[List[str]]], optional
A dictionary with interface residues of each detected cavity, by
default None.
frequencies : Dict[str, Dict[str, Dict[str, int]]], optional
A dictionary with frequencies of interface residues and classes of
residues of each detected cavity, by default None.
step : Union[float, int], optional
Grid spacing (A), by default 0.6.
`fn` must be a string or a pathlib.Path.
`input` must be a string or a pathlib.Path.
`ligand` must be a string or a pathlib.Path.
`output` must be a string or a pathlib.Path.
`output_hydropathy` must be a string or a pathlib.Path.
`volume` must be a dictionary.
`area` must be a dictionary.
`max_depth` must be a dictionary.
`avg_depth` must be a dictionary.
`avg_hydropathy` must be a dictionary.
`residues` must be a dictionary.
`frequencies` must be a dictionary.
`step` must be a positive real number.
`step` must be a positive real number.
The cavity nomenclature is based on the integer label. The cavity
marked with 2, the first integer corresponding to a cavity, is KAA, the
cavity marked with 3 is KAB, the cavity marked with 4 is KAC and so on.
import toml
# Check arguments
if type(fn) not in [str, pathlib.Path]:
raise TypeError("`fn` must be a string or a pathlib.Path.")
if input is not None:
if type(input) not in [str, pathlib.Path]:
raise TypeError("`input` must be a string or a pathlib.Path.")
if ligand is not None:
if type(ligand) not in [str, pathlib.Path]:
raise TypeError("`ligand` must be a string or a pathlib.Path.")
if output is not None:
if type(output) not in [str, pathlib.Path]:
raise TypeError("`output` must be a string or a pathlib.Path.")
if output_hydropathy is not None:
if type(output_hydropathy) not in [str, pathlib.Path]:
raise TypeError("`output_hydropathy` must be a string or a pathlib.Path.")
if volume is not None:
if type(volume) not in [dict]:
raise TypeError("`volume` must be a dictionary.")
if area is not None:
if type(area) not in [dict]:
raise TypeError("`area` must be a dictionary.")
if max_depth is not None:
if type(max_depth) not in [dict]:
raise TypeError("`max_depth` must be a dictionary.")
if avg_depth is not None:
if type(avg_depth) not in [dict]:
raise TypeError("`avg_depth` must be a dictionary.")
if avg_hydropathy is not None:
if type(avg_hydropathy) not in [dict]:
raise TypeError("`avg_hydropathy` must be a dictionary.")
if residues is not None:
if type(residues) not in [dict]:
raise TypeError("`residues` must be a dictionary.")
if frequencies is not None:
if type(frequencies) not in [dict]:
raise TypeError("`frequencies` must be a dictionary.")
if type(step) not in [float, int]:
raise TypeError("`step` must be a positive real number.")
elif step <= 0.0:
raise ValueError("`step` must be a positive real number.")
# Convert types
if type(step) == int:
step = float(step)
# Create base directories of results
os.makedirs(os.path.abspath(os.path.dirname(fn)), exist_ok=True)
# Prepare paths
input = os.path.abspath(input)
if ligand:
ligand = os.path.abspath(ligand)
if output:
output = os.path.abspath(output)
if output_hydropathy:
output_hydropathy = os.path.abspath(output_hydropathy)
# Create results dictionary
results = {
"FILES": {
"INPUT": input,
"LIGAND": ligand,
"OUTPUT": output,
"HYDROPATHY": output_hydropathy,
"STEP": step,
"VOLUME": volume,
"AREA": area,
"MAX_DEPTH": max_depth,
"AVG_DEPTH": avg_depth,
"AVG_HYDROPATHY": avg_hydropathy,
"RESIDUES": residues,
"FREQUENCY": frequencies,
# Create base directories of results TOML file
os.makedirs(os.path.abspath(os.path.dirname(fn)), exist_ok=True)
# Write results to TOML file
with open(fn, "w") as f:
f.write("# pyKVFinder results\n\n")
toml.dump(results, f)