Source code for propka.parameters

"""
Configuration file parameters
=============================

Holds parameters and settings that can be set in :file:`propka.cfg`. The file
format consists of lines of  ``keyword value [value ...]``, blank lines, and
comment lines (introduced with ``#``).

The module attributes below list the names and types of all key words
in configuration file.

"""
import logging
from dataclasses import dataclass, field
from typing import Callable, Dict, List, Sequence, Tuple, TypeVar, Union

try:
    # New in version 3.10, deprecated since version 3.12
    from typing import TypeAlias
except ImportError:
    TypeAlias = "TypeAlias"  # type: ignore


_LOGGER = logging.getLogger(__name__)


class squared_property:

    def __set_name__(self, owner, name: str):
        assert name.endswith("_squared")
        self._name_not_squared = name[:-len("_squared")]  # removesuffix()

    def __get__(self, instance, owner=None) -> float:
        if instance is None:
            return self  # type: ignore[return-value]
        return getattr(instance, self._name_not_squared)**2

    def __set__(self, instance, value: float):
        setattr(instance, self._name_not_squared, value**0.5)


T = TypeVar("T")

_T_MATRIX: TypeAlias = "InteractionMatrix"
_T_PAIR_WISE_MATRIX: TypeAlias = "PairwiseMatrix"
_T_NUMBER_DICTIONARY = Dict[str, float]
_T_LIST_DICTIONARY = Dict[str, list]
_T_STRING_DICTIONARY = Dict[str, str]
_T_STRING_LIST = List[str]
_T_STRING = str
_T_BOOL = bool


[docs] @dataclass class Parameters: """PROPKA parameter class.""" # MATRICES interaction_matrix: _T_MATRIX = field( default_factory=lambda: InteractionMatrix("interaction_matrix")) # PAIR_WISE_MATRICES sidechain_cutoffs: _T_PAIR_WISE_MATRIX = field( default_factory=lambda: PairwiseMatrix("sidechain_cutoffs")) # NUMBER_DICTIONARIES VanDerWaalsVolume: _T_NUMBER_DICTIONARY = field(default_factory=dict) charge: _T_NUMBER_DICTIONARY = field(default_factory=dict) model_pkas: _T_NUMBER_DICTIONARY = field(default_factory=dict) ions: _T_NUMBER_DICTIONARY = field(default_factory=dict) valence_electrons: _T_NUMBER_DICTIONARY = field(default_factory=dict) custom_model_pkas: _T_NUMBER_DICTIONARY = field(default_factory=dict) # LIST_DICTIONARIES backbone_NH_hydrogen_bond: _T_LIST_DICTIONARY = field(default_factory=dict) backbone_CO_hydrogen_bond: _T_LIST_DICTIONARY = field(default_factory=dict) # STRING_DICTIONARIES protein_group_mapping: _T_STRING_DICTIONARY = field(default_factory=dict) # STRING_LISTS ignore_residues: _T_STRING_LIST = field(default_factory=list) angular_dependent_sidechain_interactions: _T_STRING_LIST = field(default_factory=list) acid_list: _T_STRING_LIST = field(default_factory=list) base_list: _T_STRING_LIST = field(default_factory=list) exclude_sidechain_interactions: _T_STRING_LIST = field(default_factory=list) backbone_reorganisation_list: _T_STRING_LIST = field(default_factory=list) write_out_order: _T_STRING_LIST = field(default_factory=list) # DISTANCES desolv_cutoff: float = 20.0 buried_cutoff: float = 15.0 coulomb_cutoff1: float = 4.0 coulomb_cutoff2: float = 10.0 # DISTANCES SQUARED desolv_cutoff_squared = squared_property() buried_cutoff_squared = squared_property() coulomb_cutoff1_squared = squared_property() coulomb_cutoff2_squared = squared_property() # STRINGS version: _T_STRING = "VersionA" output_file_tag: _T_STRING = "" ligand_typing: _T_STRING = "groups" pH: _T_STRING = "variable" reference: _T_STRING = "neutral" # PARAMETERS Nmin: int = 280 Nmax: int = 560 desolvationSurfaceScalingFactor: float = 0.25 desolvationPrefactor: float = -13.0 desolvationAllowance: float = 0.0 coulomb_diel: float = 80.0 # TODO - it would be nice to rename these; they're defined everywhere COO_HIS_exception: float = 1.60 OCO_HIS_exception: float = 1.60 CYS_HIS_exception: float = 1.60 CYS_CYS_exception: float = 3.60 min_ligand_model_pka: float = -10.0 max_ligand_model_pka: float = 20.0 coupling_max_number_of_bonds: int = 3 min_bond_distance_for_hydrogen_bonds: int = 4 shared_determinants: _T_BOOL = False common_charge_centre: _T_BOOL = False remove_penalised_group: _T_BOOL = True max_intrinsic_pka_diff: float = 2.0 min_interaction_energy: float = 0.5 max_free_energy_diff: float = 1.0 min_swap_pka_shift: float = 1.0 min_pka: float = 0.0 max_pka: float = 10.0 sidechain_interaction: float = 0.85
[docs] def parse_line(self, line): """Parse parameter file line.""" # first, remove comments comment_pos = line.find('#') if comment_pos != -1: line = line[:comment_pos] # split the line into words words = line.split() if len(words) == 0: return # parse the words typeannotation = self.__annotations__.get(words[0]) if typeannotation is _T_NUMBER_DICTIONARY: self.parse_to_number_dictionary(words) elif typeannotation is _T_STRING_LIST: self.parse_to_string_list(words) elif typeannotation is _T_STRING: self.parse_string(words) elif typeannotation is _T_LIST_DICTIONARY: self.parse_to_list_dictionary(words) elif typeannotation is _T_MATRIX or typeannotation is _T_PAIR_WISE_MATRIX: self.parse_to_matrix(words) elif typeannotation is _T_STRING_DICTIONARY: self.parse_to_string_dictionary(words) elif typeannotation is int or typeannotation is _T_BOOL: self.parse_parameter(words, int) else: self.parse_parameter(words, float)
[docs] def parse_to_number_dictionary(self, words): """Parse field to number dictionary. Args: words: strings to parse. """ assert len(words) == 3, words dict_ = getattr(self, words[0]) key = words[1] value = words[2] dict_[key] = float(value)
[docs] def parse_to_string_dictionary(self, words): """Parse field to string dictionary. Args: words: strings to parse """ assert len(words) == 3, words dict_ = getattr(self, words[0]) key = words[1] value = words[2] dict_[key] = value
[docs] def parse_to_list_dictionary(self, words: List[str]): """Parse field to list dictionary. Args: words: strings to parse. """ assert len(words) > 2, words dict_ = getattr(self, words[0]) key = words[1] if key not in dict_: dict_[key] = [] for value in words[2:]: if isinstance(value, list): dict_[key].append([float(x) for x in value]) dict_[key].append(float(value))
[docs] def parse_to_string_list(self, words): """Parse field to string list. Args: words: strings to parse """ assert len(words) == 2, words list_ = getattr(self, words[0]) value = words[1] list_.append(value)
[docs] def parse_to_matrix(self, words): """Parse field to matrix. Args: words: strings to parse """ matrix = getattr(self, words[0]) value = tuple(words[1:]) matrix.add(value)
[docs] def parse_parameter(self, words, typefunc: Callable[[str], T]): """Parse field to parameters. Args: words: strings to parse """ assert len(words) == 2, words value = typefunc(words[1]) setattr(self, words[0], value)
[docs] def parse_string(self, words): """Parse field to strings. Args: words: strings to parse """ assert len(words) == 2, words setattr(self, words[0], words[1])
[docs] def print_interaction_parameters(self): """Print interaction parameters.""" _LOGGER.info('--------------- Model pKa values ----------------------') for k in self.model_pkas: _LOGGER.info('{0:>3s} {1:8.2f}'.format(k, self.model_pkas[k])) _LOGGER.info('') _LOGGER.info('--------------- Interactions --------------------------') agroups = [ 'COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] lgroups = [ 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] map_ = { 'CG': ['ARG'], 'C2N': ['ARG'], 'N30': ['N+', 'LYS'], 'N31': ['N+', 'LYS'], 'N32': ['N+', 'LYS'], 'N33': ['N+', 'LYS'], 'NAR': ['HIS'], 'OCO': ['COO'], 'OP': [], 'SH': ['CYS'], 'NP1': [], 'OH': ['ROH'], 'O3': [], 'CL': [], 'F': [], 'NAM': ['AMD'], 'N1': [], 'O2': []} for group1 in agroups: for group2 in lgroups: fmt = "{grp1:>3s} {grp2:>3s} {mat:1s} {val1:4} {val2:4}" interaction = fmt.format( grp1=group1, grp2=group2, mat=self.interaction_matrix[group1][group2], val1=self.sidechain_cutoffs.get_value(group1, group2)[0], val2=self.sidechain_cutoffs.get_value(group1, group2)[1]) map_interaction = '' if group2 in map_: for val in map_[group2]: fmt = ( "|{grp1:>3s} {grp2:>3s} {mat:1s} {val1:4} {val2:4}" ) map_interaction += fmt.format( group1, val, self.interaction_matrix[group1][val], self.sidechain_cutoffs.get_value(group1, val)[0], self.sidechain_cutoffs.get_value(group1, val)[1]) if (self.interaction_matrix[group1][val] != self.interaction_matrix[group1][group2]): map_interaction += '* ' if (self.sidechain_cutoffs.get_value(group1, val)[0] != self.sidechain_cutoffs.get_value( group1, group2)[0] or self.sidechain_cutoffs.get_value( group1, val)[1] != self.sidechain_cutoffs.get_value( group1, group2)[1]): map_interaction += '! ' else: map_interaction += ' ' if (len(map_[group2]) == 0 and (self.sidechain_cutoffs.get_value( group1, group2)[0] != 3 or self.sidechain_cutoffs.get_value( group1, group2)[1] != 4)): map_interaction += '? ' _LOGGER.info("%s %s", interaction, map_interaction) if group1 == group2: break _LOGGER.info('-') _LOGGER.info('--------------- Exceptions ----------------------------') _LOGGER.info('COO-HIS %s', self.COO_HIS_exception) _LOGGER.info('OCO-HIS %s', self.OCO_HIS_exception) _LOGGER.info('CYS-HIS %s', self.CYS_HIS_exception) _LOGGER.info('CYS-CYS %s', self.CYS_CYS_exception) _LOGGER.info('--------------- Mapping -------------------------------') _LOGGER.info(""" Titratable: CG ARG C2N ARG N30 N+/LYS N31 N+/LYS N32 N+/LYS N33 N+/LYS NAR HIS OCO COO OP TYR/SER? SH CYS Non-titratable: NP1 AMD? OH ROH O3 ? CL F NAM N1 O2 """)
[docs] def print_interaction_parameters_latex(self): """Print interaction parameters in LaTeX format.""" # TODO - if these lists and dictionaries are the same as above, then # should be constants at the level of the module agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] lgroups = ['CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] lines = [ "", "\\begin{{longtable}}{{lllll}}", ("\\caption{{Ligand interaction parameters. For interactions not " "listed, the default value of {0:s} is applied.}}").format( self.sidechain_cutoffs.default), "\\label{{tab:ligand_interaction_parameters}}\\\\", "\\toprule", "Group1 & Group2 & Interaction & c1 &c2 \\\\", "\\midrule", "\\endfirsthead", "", "\\multicolumn{{5}}{{l}}{\\emph{{continued from the previous " "page}}}\\\\", "\\toprule", "Group1 & Group2 & Interaction & c1 &c2 \\\\", "\\midrule", "\\endhead", "", "\\midrule", "\\multicolumn{{5}}{{r}}{\\emph{{continued on the next " "page}}}\\\\", "\\endfoot", "", "\\bottomrule", "\\endlastfoot", ""] str_ = "\n".join(lines) for group1 in agroups: for group2 in lgroups: if self.interaction_matrix[group1][group2] == '-': continue if (self.sidechain_cutoffs.get_value(group1, group2) == self.sidechain_cutoffs.default): continue fmt = ( "{grp1:>3s} & {grp2:>3s} & {mat:1s} & {val1:4} & " "{val2:4}\\\\ \n") str_ += fmt.format( group1, group2, self.interaction_matrix[group1][group2], self.sidechain_cutoffs.get_value(group1, group2)[0], self.sidechain_cutoffs.get_value(group1, group2)[1]) if group1 == group2: break str_ += ' \\end{{longtable}}\n' _LOGGER.info(str_)
[docs] def print_interactions_latex(self): """Print interactions in LaTeX.""" # TODO - are these the same lists as above? Convert to module # constants. agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] lines = [ "", "\\begin{{longtable}}{{{0:s}}}".format('l'*len(agroups)), ("\\caption{{Ligand interaction parameters. For interactions not " "listed, the default value of {0:s} is applied.}}").format( str(self.sidechain_cutoffs.default)), "\\label{{tab:ligand_interaction_parameters}}\\\\", "\\toprule", "Group1 & Group2 & Interaction & c1 &c2 \\\\", "\\midrule", "\\endfirsthead", "", "\\multicolumn{{5}}{{l}}{\\emph{{continued from the previous " "page}}}\\\\", "\\toprule", "Group1 & Group2 & Interaction & c1 &c2 \\\\", "\\midrule", "\\endhead", "", "\\midrule", "\\multicolumn{{5}}{{r}}{\\emph{{continued on the next " "page}}}\\\\", "\\endfoot", "", "\\bottomrule", "\\endlastfoot", "" ] str_ = "\n".join(lines) for group1 in agroups: for group2 in agroups: fmt = ( '{g1:>3s} & {g2:>3s} & {mat:1s} & {val1:>4s} & ' '{val2:>4s}\\\\ \n' ) str_ += fmt.format( group1, group2, self.interaction_matrix[group1][group2], str(self.sidechain_cutoffs.get_value(group1, group2)[0]), str(self.sidechain_cutoffs.get_value(group1, group2)[1])) if group1 == group2: break str_ += ' \\end{{longtable}}\n' _LOGGER.info(str_)
[docs] class InteractionMatrix: """Interaction matrix class.""" def __init__(self, name: str): """Initialize with name of matrix. Args: name: name of interaction matrix """ self.name = name self.ordered_keys: List[str] = [] self.dictionary: Dict[str, Dict[str, Union[str, float]]] = {}
[docs] def add(self, words: Sequence[str]): """Add values to matrix. Args: words: values to add """ len_expected = len(self.ordered_keys) + 2 if len(words) != len_expected: raise ValueError(f"Expected {len_expected} arguments, got {words!r}") new_group = words[0] self.ordered_keys.append(new_group) if new_group not in self.dictionary.keys(): self.dictionary[new_group] = {} for i, group in enumerate(self.ordered_keys): if len(words) > i+1: value: Union[str, float] try: value = float(words[i+1]) except ValueError: value = words[i+1] self.dictionary[group][new_group] = value self.dictionary[new_group][group] = value
[docs] def get_value(self, item1: str, item2: str) -> Union[str, float, None]: """Get specific matrix value. Args: item1: matrix row index item2: matrix column index Returns: matrix value or None """ try: return self.dictionary[item1][item2] except KeyError: return None
def __getitem__(self, group: str): """Get specific group from matrix. Args: group: group to get """ if group not in self.dictionary.keys(): str_ = '{0:s} not found in interaction matrix {1:s}'.format( group, self.name) raise KeyError(str_) return self.dictionary[group]
[docs] def keys(self): """Get keys from matrix. Returns: dictionary key list """ return self.dictionary.keys()
def __str__(self): str_ = ' ' for key in self.ordered_keys: str_ += '{0:>3s} '.format(key) str_ += '\n' for key1 in self.ordered_keys: str_ += '{0:>3s} '.format(key1) for key2 in self.ordered_keys: str_ += '{0:>3s} '.format(self[key1][key2]) str_ += '\n' return str_
[docs] class PairwiseMatrix: """Pairwise interaction matrix class.""" def __init__(self, name: str): """Initialize pairwise matrix. Args: name: name of pairwise interaction """ self.name = name self.dictionary: Dict[str, Dict[str, Tuple[float, float]]] = {} self.default = (0.0, 0.0)
[docs] def add(self, words: Sequence[str]): """Add information to the matrix. TODO - this function unnecessarily bundles arguments into a tuple Args: words: tuple with assignment information and value """ # assign the default value if len(words) == 3 and words[0] == 'default': self.default = (float(words[1]), float(words[2])) return # assign non-default values assert len(words) == 4 group1 = words[0] group2 = words[1] value = (float(words[2]), float(words[3])) self.insert(group1, group2, value) self.insert(group2, group1, value)
[docs] def insert(self, key1: str, key2: str, value: Tuple[float, float]): """Insert value into matrix. Args: key1: first matrix key (row) key2: second matrix key (column) value: value to insert """ if key1 in self.dictionary and key2 in self.dictionary[key1]: if key1 != key2: str_ = ( 'Parameter value for {0:s}, {1:s} defined more ' 'than once'.format(key1, key2)) _LOGGER.warning(str_) if key1 not in self.dictionary: self.dictionary[key1] = {} self.dictionary[key1][key2] = value
[docs] def get_value(self, item1: str, item2: str) -> Tuple[float, float]: """Get specified value from matrix. Args: item1: row index item2: column index Returns: matrix value (or default) """ try: return self.dictionary[item1][item2] except KeyError: return self.default
def __getitem__(self, group: str): """Get item from matrix corresponding to specific group. Args: group: group to retrieve Returns: matrix information """ if group not in self.dictionary.keys(): str_ = '{0:s} not found in interaction matrix {1:s}'.format( group, self.name) raise KeyError(str_) return self.dictionary[group]
[docs] def keys(self): """Get keys from matrix. Returns: dictionary key list """ return self.dictionary.keys()
def __str__(self): str_ = '' for key1 in self.keys(): for key2 in self[key1].keys(): str_ += '{0:s} {1:s} {2:s}\n'.format( key1, key2, self[key1][key2]) return str_