Source code for propka.lib

"""
Set-up of a PROPKA calculation
==============================

Implements many of the main functions used to call PROPKA.
"""

import logging
import argparse
from pathlib import Path
from typing import Dict, Iterable, Iterator, List, TYPE_CHECKING, NoReturn, Optional, Tuple, TypeVar

if TYPE_CHECKING:
    from propka.atom import Atom
    from propka.conformation_container import ConformationContainer


T = TypeVar("T")
Number = TypeVar("Number", int, float)

_T_RESIDUE_TUPLE = Tuple[str, int, str]

_LOGGER = logging.getLogger(__name__)


EXPECTED_ATOM_NUMBERS = {'ALA': 5, 'ARG': 11, 'ASN': 8, 'ASP': 8, 'CYS': 6,
                         'GLY': 4, 'GLN': 9, 'GLU': 9, 'HIS': 10, 'ILE': 8,
                         'LEU': 8, 'LYS': 9, 'MET': 8, 'PHE': 11, 'PRO': 7,
                         'SER': 6, 'THR': 7, 'TRP': 14, 'TYR': 12, 'VAL': 7}


class Options:
    # Note: All the "NoReturn" members appear to be unused
    chains: Optional[List[str]]
    display_coupled_residues: bool = False
    filenames: List[str]  # List[Path]?
    grid: Tuple[float, float, float] = (0.0, 14.0, 0.1)
    input_pdb: str  # Path?
    keep_protons: bool = False
    log_level: str = 'INFO'
    pH: NoReturn  # float = 7.0
    parameters: Path
    protonate_all: bool = False
    reference: NoReturn  # str = 'neutral'
    reuse_ligand_mol2_file: bool = False  # only used by unused function
    titrate_only: Optional[List[_T_RESIDUE_TUPLE]]
    window: Tuple[float, float, float] = (0.0, 14.0, 1.0)



[docs]
def protein_precheck(conformations: Dict[str, "ConformationContainer"],
                     names: Iterable[str]):
    """Check protein for correct number of atoms, etc.

    Args:
        names:  conformation names to check
    """
    for name in names:
        atoms = conformations[name].atoms
        # Group the atoms by their residue:
        atoms_by_residue: Dict[str, List[Atom]] = {}
        for atom in atoms:
            if atom.element != 'H':
                res_id = resid_from_atom(atom)
                try:
                    atoms_by_residue[res_id].append(atom)
                except KeyError:
                    atoms_by_residue[res_id] = [atom]
        for res_id, res_atoms in atoms_by_residue.items():
            res_name = res_atoms[0].res_name
            residue_label = '{0:>3s}{1:>5s}'.format(res_name, res_id)
            # ignore ligand residues
            if res_name not in EXPECTED_ATOM_NUMBERS:
                continue
            # check for c-terminal
            if 'C-' in [a.terminal for a in res_atoms]:
                if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]+1:
                    str_ = ("Unexpected number ({num:d}) of atoms in residue "
                            "{res:s} in conformation {conf:s}".format(
                                num=len(res_atoms), res=residue_label,
                                conf=name))
                    _LOGGER.warning(str_)
                continue
            # check number of atoms in residue
            if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]:
                str_ = ("Unexpected number ({num:d}) of atoms in residue "
                        "{res:s} in conformation {conf:s}".format(
                            num=len(res_atoms), res=residue_label,
                            conf=name))
                _LOGGER.warning(str_)




[docs]
def resid_from_atom(atom):
    """Return string with atom residue information.

    Args:
        atom:  atom to generate string for
    Returns
        string
    """
    return '{0:>4d} {1:s} {2:s}'.format(
        atom.res_num, atom.chain_id, atom.icode)




[docs]
def split_atoms_into_molecules(atoms: List["Atom"]):
    """Maps atoms into molecules.

    Args:
        atoms:  list of atoms
    Returns:
        list of molecules
    """
    molecules: List[List["Atom"]] = []
    while len(atoms) > 0:
        initial_atom = atoms.pop()
        molecules.append(make_molecule(initial_atom, atoms))
    return molecules




[docs]
def make_molecule(atom: "Atom", atoms: List["Atom"]):
    """Make a molecule from atoms.

    Args:
        atom:  one of the atoms
        atoms:  a list of the remaining atoms
    Return:
        list of atoms
    """
    bonded_atoms = [a for a in atoms if atom in a.bonded_atoms]
    res_atoms = [atom]
    for bond_atom in bonded_atoms:
        if bond_atom in atoms:
            atoms.remove(bond_atom)
            res_atoms.extend(make_molecule(bond_atom, atoms))
    return res_atoms




[docs]
def make_grid(min_: Number, max_: Number, step: Number) -> Iterator[Number]:
    """Make a grid across the specified tange.

    Like range() for integers or numpy.arange() for floats, except that `max_`
    is not excluded from the range.

    Args:
        min_:  minimum value of grid
        max_:  maximum value of grid
        step:  grid step size
    """
    x = min_
    while x <= max_:
        yield x
        x += step




[docs]
def generate_combinations(interactions: Iterable[T]) -> List[List[T]]:
    """Generate combinations of interactions.

    Args:
        interactions:  list of interactions
    Returns:
        list of combinations
    """
    res: List[List[T]] = [[]]
    for interaction in interactions:
        res = make_combination(res, interaction)
    res.remove([])
    return res




[docs]
def make_combination(combis: List[List[T]], interaction: T) -> List[List[T]]:
    """Make a specific set of combinations.

    Args:
        combis:  list of combinations
        interaction:  interaction to add to combinations
    Returns:
        list of combinations
    """
    res = []
    for combi in combis:
        res.append(combi+[interaction])
        res.append(combi)
    return res




[docs]
def parse_res_string(res_str: str) -> _T_RESIDUE_TUPLE:
    """Parse a residue string.

    Args:
        res_string:  residue string in format "chain:resnum[inscode]"
    Returns:
        a tuple of (chain, resnum, inscode).
    Raises:
        ValueError if the input string is invalid.
    """
    try:
        chain, resnum_str = res_str.split(":")
    except ValueError:
        raise ValueError("Invalid residue string (must contain 2 "
                         "colon-separated values)")
    try:
        resnum = int(resnum_str)
    except ValueError:
        try:
            resnum = int(resnum_str[:-1])
        except ValueError:
            raise ValueError("Invalid residue number (not an int)")
        else:
            inscode = resnum_str[-1]
    else:
        inscode = " "
    return chain, resnum, inscode



def parse_res_list(titrate_only: str):
    res_list: List[_T_RESIDUE_TUPLE] = []
    for res_str in titrate_only.split(','):
        try:
            res_list.append(parse_res_string(res_str))
        except ValueError as ex:
            raise argparse.ArgumentTypeError(f'{ex}: "{res_str:s}"')
    return res_list



[docs]
def build_parser(parser=None):
    """Build an argument parser for PROPKA.

    Args:
        parser:  existing parser. If this is not None, then the PROPKA parser
                 will be created as a subparser to this existing parser.
                 Otherwise, a new parser will be created.
    Returns:
        ArgumentParser object.


    .. versionchanged:: 3.4.0
       Argument `--generate-propka-input` has been removed as writing PROPKA
       input files is no longer supported.
    """
    import propka

    if parser is not None:
        group = parser.add_argument_group(title="PROPKA invocation options")
    else:
        parser = argparse.ArgumentParser(
            description=("PROPKA predicts the pKa values of ionizable "
                         "groups in proteins and protein-ligand "
                         "complexes based in the 3D structure"),
            formatter_class=argparse.ArgumentDefaultsHelpFormatter)
        # This is duck-typing at its finest
        group = parser
        group.add_argument("input_pdb", help="read data from <filename>")
    group.add_argument(
        "-f", "--file", action="append", dest="filenames", default=[],
        help=(
            "read data from <filename>, i.e. <filename> is added to arguments"
        )
    )
    group.add_argument(
        "-r", "--reference", dest="reference", default="neutral",
        help=("setting which reference to use for stability calculations "
              "[neutral/low-pH]"))
    group.add_argument(
        "-c", "--chain", action="append", dest="chains",
        help=('creating the protein with only a specified chain. Specify '
              '" " for chains without ID [all]'))
    group.add_argument(
        "-i", "--titrate_only", dest="titrate_only",
        type=parse_res_list,
        help=('Treat only the specified residues as titratable. Value should '
              'be a comma-separated list of "chain:resnum" values; for '
              'example: -i "A:10,A:11"'))
    group.add_argument(
        "--version", action="version", version=f"%(prog)s {propka.__version__}")
    group.add_argument(
        "-p", "--parameters", dest="parameters",
        type=Path, default=Path(__file__).parent / "propka.cfg",
        help="set the parameter file")
    try:
        group.add_argument(
            "--log-level",
            choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
            help="logging level verbosity", default="INFO")
    except argparse.ArgumentError:
        # It is possible that --log-level has already been set by APBS
        pass
    group.add_argument(
        "-o", "--pH", dest="pH", type=float, default=7.0,
        help="setting pH-value used in e.g. stability calculations [7.0]")
    group.add_argument(
        "-w", "--window", dest="window", nargs=3, type=float,
        default=(0.0, 14.0, 1.0),
        help=("setting the pH-window to show e.g. stability profiles "
              "[0.0, 14.0, 1.0]"))
    group.add_argument(
        "-g", "--grid", dest="grid", nargs=3, type=float,
        default=(0.0, 14.0, 0.1),
        help=("setting the pH-grid to calculate e.g. stability "
              "related properties [0.0, 14.0, 0.1]"))
    group.add_argument(
        "-d", "--display-coupled-residues", dest="display_coupled_residues",
        action="store_true",
        help=("Displays alternative pKa values due "
              "to coupling of titratable groups"))
    group.add_argument(
        "-l", "--reuse-ligand-mol2-files", dest="reuse_ligand_mol2_file",
        action="store_true", default=False,
        help=("Reuses the ligand mol2 files allowing the user to alter "
              "ligand bond orders"))
    group.add_argument(
        "-k", "--keep-protons", dest="keep_protons", action="store_true",
        help="Keep protons in input file", default=False)
    group.add_argument(
        "-q", "--quiet", action="store_const", const="WARNING",
        dest="log_level", help="suppress non-warning messages")
    group.add_argument(
        "--protonate-all", dest="protonate_all", action="store_true",
        help="Protonate all atoms (will not influence pKa calculation)",
        default=False)
    return parser




[docs]
def loadOptions(args=None) -> Options:
    """
    Load the arguments parser with options. Note that verbosity is set as soon
    as this function is invoked.

    Arguments:
        args:  list of arguments
    Returns:
        argparse namespace
    """
    # loading the parser
    parser = build_parser()
    # parsing and returning options and arguments
    options = parser.parse_args(args, namespace=Options())

    # adding specified filenames to arguments
    options.filenames.append(options.input_pdb)
    # Set the no-print variable
    level = getattr(logging, options.log_level)
    _LOGGER.setLevel(level)
    # done!
    return options




[docs]
def make_tidy_atom_label(name, element):
    """Returns a 'tidier' atom label for printing to the new PDB file.

    Args:
        name:  atom name
        element:  atom element
    Returns:
        string
    """
    if len(name) > 4:  # if longer than 4, just truncate the name
        label = name[0:4]
    elif len(name) == 4:  # if length is 4, otherwise use the name as it is
        label = name
    else:  # if less than 4 characters long, insert white space as needed
        if len(element) == 1:
            label = ' {0:<3s}'.format(name)
        else:  # The element should occupy the two first chars
            label = '{0:<4s}'.format(name)
    return label