Source code for propka.parameters

"""
Configuration file parameters
=============================

Holds parameters and settings that can be set in :file:`propka.cfg`. The file
format consists of lines of  ``keyword value [value ...]``, blank lines, and
comment lines (introduced with ``#``).

The module attributes below list the names and types of all key words
in configuration file.

"""
import logging
from dataclasses import dataclass, field
from typing import Dict, List

try:
    # New in version 3.10, deprecated since version 3.12
    from typing import TypeAlias
except ImportError:
    TypeAlias = "TypeAlias"  # type: ignore


_LOGGER = logging.getLogger(__name__)


class squared_property:

    def __set_name__(self, owner, name: str):
        assert name.endswith("_squared")
        self._name_not_squared = name[:-len("_squared")]  # removesuffix()

    def __get__(self, instance, owner=None) -> float:
        if instance is None:
            return self  # type: ignore[return-value]
        return getattr(instance, self._name_not_squared)**2

    def __set__(self, instance, value: float):
        setattr(instance, self._name_not_squared, value**0.5)


_T_MATRIX: TypeAlias = "InteractionMatrix"
_T_PAIR_WISE_MATRIX: TypeAlias = "PairwiseMatrix"
_T_NUMBER_DICTIONARY = Dict[str, float]
_T_LIST_DICTIONARY = Dict[str, list]
_T_STRING_DICTIONARY = Dict[str, str]
_T_STRING_LIST = List[str]
_T_STRING = str
_T_BOOL = bool


[docs]@dataclass class Parameters: """PROPKA parameter class.""" # MATRICES interaction_matrix: _T_MATRIX = field( default_factory=lambda: InteractionMatrix("interaction_matrix")) # PAIR_WISE_MATRICES sidechain_cutoffs: _T_PAIR_WISE_MATRIX = field( default_factory=lambda: PairwiseMatrix("sidechain_cutoffs")) # NUMBER_DICTIONARIES VanDerWaalsVolume: _T_NUMBER_DICTIONARY = field(default_factory=dict) charge: _T_NUMBER_DICTIONARY = field(default_factory=dict) model_pkas: _T_NUMBER_DICTIONARY = field(default_factory=dict) ions: _T_NUMBER_DICTIONARY = field(default_factory=dict) valence_electrons: _T_NUMBER_DICTIONARY = field(default_factory=dict) custom_model_pkas: _T_NUMBER_DICTIONARY = field(default_factory=dict) # LIST_DICTIONARIES backbone_NH_hydrogen_bond: _T_LIST_DICTIONARY = field(default_factory=dict) backbone_CO_hydrogen_bond: _T_LIST_DICTIONARY = field(default_factory=dict) # STRING_DICTIONARIES protein_group_mapping: _T_STRING_DICTIONARY = field(default_factory=dict) # STRING_LISTS ignore_residues: _T_STRING_LIST = field(default_factory=list) angular_dependent_sidechain_interactions: _T_STRING_LIST = field(default_factory=list) acid_list: _T_STRING_LIST = field(default_factory=list) base_list: _T_STRING_LIST = field(default_factory=list) exclude_sidechain_interactions: _T_STRING_LIST = field(default_factory=list) backbone_reorganisation_list: _T_STRING_LIST = field(default_factory=list) write_out_order: _T_STRING_LIST = field(default_factory=list) # DISTANCES desolv_cutoff: float = 20.0 buried_cutoff: float = 15.0 coulomb_cutoff1: float = 4.0 coulomb_cutoff2: float = 10.0 # DISTANCES SQUARED desolv_cutoff_squared = squared_property() buried_cutoff_squared = squared_property() coulomb_cutoff1_squared = squared_property() coulomb_cutoff2_squared = squared_property() # STRINGS version: _T_STRING = "VersionA" output_file_tag: _T_STRING = "" ligand_typing: _T_STRING = "groups" pH: _T_STRING = "variable" reference: _T_STRING = "neutral" # PARAMETERS Nmin: int = 280 Nmax: int = 560 desolvationSurfaceScalingFactor: float = 0.25 desolvationPrefactor: float = -13.0 desolvationAllowance: float = 0.0 coulomb_diel: float = 80.0 # TODO - it would be nice to rename these; they're defined everywhere COO_HIS_exception: float = 1.60 OCO_HIS_exception: float = 1.60 CYS_HIS_exception: float = 1.60 CYS_CYS_exception: float = 3.60 min_ligand_model_pka: float = -10.0 max_ligand_model_pka: float = 20.0 # include_H_in_interactions: NoReturn = None coupling_max_number_of_bonds: int = 3 min_bond_distance_for_hydrogen_bonds: int = 4 # coupling_penalty: NoReturn = None shared_determinants: _T_BOOL = False common_charge_centre: _T_BOOL = False # hide_penalised_group: NoReturn = None remove_penalised_group: _T_BOOL = True max_intrinsic_pka_diff: float = 2.0 min_interaction_energy: float = 0.5 max_free_energy_diff: float = 1.0 min_swap_pka_shift: float = 1.0 min_pka: float = 0.0 max_pka: float = 10.0 sidechain_interaction: float = 0.85
[docs] def parse_line(self, line): """Parse parameter file line.""" # first, remove comments comment_pos = line.find('#') if comment_pos != -1: line = line[:comment_pos] # split the line into words words = line.split() if len(words) == 0: return # parse the words typeannotation = self.__annotations__.get(words[0]) if typeannotation is _T_NUMBER_DICTIONARY: self.parse_to_number_dictionary(words) elif typeannotation is _T_STRING_LIST: self.parse_to_string_list(words) elif typeannotation is _T_STRING: self.parse_string(words) elif typeannotation is _T_LIST_DICTIONARY: self.parse_to_list_dictionary(words) elif typeannotation is _T_MATRIX or typeannotation is _T_PAIR_WISE_MATRIX: self.parse_to_matrix(words) elif typeannotation is _T_STRING_DICTIONARY: self.parse_to_string_dictionary(words) else: self.parse_parameter(words)
[docs] def parse_to_number_dictionary(self, words): """Parse field to number dictionary. Args: words: strings to parse. """ assert len(words) == 3, words dict_ = getattr(self, words[0]) key = words[1] value = words[2] dict_[key] = float(value)
[docs] def parse_to_string_dictionary(self, words): """Parse field to string dictionary. Args: words: strings to parse """ assert len(words) == 3, words dict_ = getattr(self, words[0]) key = words[1] value = words[2] dict_[key] = value
[docs] def parse_to_list_dictionary(self, words: List[str]): """Parse field to list dictionary. Args: words: strings to parse. """ assert len(words) > 2, words dict_ = getattr(self, words[0]) key = words[1] if key not in dict_: dict_[key] = [] for value in words[2:]: if isinstance(value, list): dict_[key].append([float(x) for x in value]) dict_[key].append(float(value))
[docs] def parse_to_string_list(self, words): """Parse field to string list. Args: words: strings to parse """ assert len(words) == 2, words list_ = getattr(self, words[0]) value = words[1] list_.append(value)
[docs] def parse_to_matrix(self, words): """Parse field to matrix. Args: words: strings to parse """ matrix = getattr(self, words[0]) value = tuple(words[1:]) matrix.add(value)
[docs] def parse_parameter(self, words): """Parse field to parameters. Args: words: strings to parse """ assert len(words) == 2, words value = float(words[1]) setattr(self, words[0], value)
[docs] def parse_string(self, words): """Parse field to strings. Args: words: strings to parse """ assert len(words) == 2, words setattr(self, words[0], words[1])
[docs] def print_interaction_parameters(self): """Print interaction parameters.""" _LOGGER.info('--------------- Model pKa values ----------------------') for k in self.model_pkas: _LOGGER.info('{0:>3s} {1:8.2f}'.format(k, self.model_pkas[k])) _LOGGER.info('') _LOGGER.info('--------------- Interactions --------------------------') agroups = [ 'COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] lgroups = [ 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] map_ = { 'CG': ['ARG'], 'C2N': ['ARG'], 'N30': ['N+', 'LYS'], 'N31': ['N+', 'LYS'], 'N32': ['N+', 'LYS'], 'N33': ['N+', 'LYS'], 'NAR': ['HIS'], 'OCO': ['COO'], 'OP': [], 'SH': ['CYS'], 'NP1': [], 'OH': ['ROH'], 'O3': [], 'CL': [], 'F': [], 'NAM': ['AMD'], 'N1': [], 'O2': []} for group1 in agroups: for group2 in lgroups: fmt = "{grp1:>3s} {grp2:>3s} {mat:1s} {val1:4} {val2:4}" interaction = fmt.format( grp1=group1, grp2=group2, mat=self.interaction_matrix[group1][group2], val1=self.sidechain_cutoffs.get_value(group1, group2)[0], val2=self.sidechain_cutoffs.get_value(group1, group2)[1]) map_interaction = '' if group2 in map_: for val in map_[group2]: fmt = ( "|{grp1:>3s} {grp2:>3s} {mat:1s} {val1:4} {val2:4}" ) map_interaction += fmt.format( group1, val, self.interaction_matrix[group1][val], self.sidechain_cutoffs.get_value(group1, val)[0], self.sidechain_cutoffs.get_value(group1, val)[1]) if (self.interaction_matrix[group1][val] != self.interaction_matrix[group1][group2]): map_interaction += '* ' if (self.sidechain_cutoffs.get_value(group1, val)[0] != self.sidechain_cutoffs.get_value( group1, group2)[0] or self.sidechain_cutoffs.get_value( group1, val)[1] != self.sidechain_cutoffs.get_value( group1, group2)[1]): map_interaction += '! ' else: map_interaction += ' ' if (len(map_[group2]) == 0 and (self.sidechain_cutoffs.get_value( group1, group2)[0] != 3 or self.sidechain_cutoffs.get_value( group1, group2)[1] != 4)): map_interaction += '? ' _LOGGER.info("%s %s", interaction, map_interaction) if group1 == group2: break _LOGGER.info('-') _LOGGER.info('--------------- Exceptions ----------------------------') _LOGGER.info('COO-HIS %s', self.COO_HIS_exception) _LOGGER.info('OCO-HIS %s', self.OCO_HIS_exception) _LOGGER.info('CYS-HIS %s', self.CYS_HIS_exception) _LOGGER.info('CYS-CYS %s', self.CYS_CYS_exception) _LOGGER.info('--------------- Mapping -------------------------------') _LOGGER.info(""" Titratable: CG ARG C2N ARG N30 N+/LYS N31 N+/LYS N32 N+/LYS N33 N+/LYS NAR HIS OCO COO OP TYR/SER? SH CYS Non-titratable: NP1 AMD? OH ROH O3 ? CL F NAM N1 O2 """)
[docs] def print_interaction_parameters_latex(self): """Print interaction parameters in LaTeX format.""" # TODO - if these lists and dictionaries are the same as above, then # should be constants at the level of the module agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] lgroups = ['CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] lines = [ "", "\\begin{{longtable}}{{lllll}}", ("\\caption{{Ligand interaction parameters. For interactions not " "listed, the default value of {0:s} is applied.}}").format( self.sidechain_cutoffs.default), "\\label{{tab:ligand_interaction_parameters}}\\\\", "\\toprule", "Group1 & Group2 & Interaction & c1 &c2 \\\\", "\\midrule", "\\endfirsthead", "", "\\multicolumn{{5}}{{l}}{\\emph{{continued from the previous " "page}}}\\\\", "\\toprule", "Group1 & Group2 & Interaction & c1 &c2 \\\\", "\\midrule", "\\endhead", "", "\\midrule", "\\multicolumn{{5}}{{r}}{\\emph{{continued on the next " "page}}}\\\\", "\\endfoot", "", "\\bottomrule", "\\endlastfoot", ""] str_ = "\n".join(lines) for group1 in agroups: for group2 in lgroups: if self.interaction_matrix[group1][group2] == '-': continue if (self.sidechain_cutoffs.get_value(group1, group2) == self.sidechain_cutoffs.default): continue fmt = ( "{grp1:>3s} & {grp2:>3s} & {mat:1s} & {val1:4} & " "{val2:4}\\\\ \n") str_ += fmt.format( group1, group2, self.interaction_matrix[group1][group2], self.sidechain_cutoffs.get_value(group1, group2)[0], self.sidechain_cutoffs.get_value(group1, group2)[1]) if group1 == group2: break str_ += ' \\end{{longtable}}\n' _LOGGER.info(str_)
[docs] def print_interactions_latex(self): """Print interactions in LaTeX.""" # TODO - are these the same lists as above? Convert to module # constants. agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] lines = [ "", "\\begin{{longtable}}{{{0:s}}}".format('l'*len(agroups)), ("\\caption{{Ligand interaction parameters. For interactions not " "listed, the default value of {0:s} is applied.}}").format( str(self.sidechain_cutoffs.default)), "\\label{{tab:ligand_interaction_parameters}}\\\\", "\\toprule", "Group1 & Group2 & Interaction & c1 &c2 \\\\", "\\midrule", "\\endfirsthead", "", "\\multicolumn{{5}}{{l}}{\\emph{{continued from the previous " "page}}}\\\\", "\\toprule", "Group1 & Group2 & Interaction & c1 &c2 \\\\", "\\midrule", "\\endhead", "", "\\midrule", "\\multicolumn{{5}}{{r}}{\\emph{{continued on the next " "page}}}\\\\", "\\endfoot", "", "\\bottomrule", "\\endlastfoot", "" ] str_ = "\n".join(lines) for group1 in agroups: for group2 in agroups: fmt = ( '{g1:>3s} & {g2:>3s} & {mat:1s} & {val1:>4s} & ' '{val2:>4s}\\\\ \n' ) str_ += fmt.format( group1, group2, self.interaction_matrix[group1][group2], str(self.sidechain_cutoffs.get_value(group1, group2)[0]), str(self.sidechain_cutoffs.get_value(group1, group2)[1])) if group1 == group2: break str_ += ' \\end{{longtable}}\n' _LOGGER.info(str_)
[docs]class InteractionMatrix: """Interaction matrix class.""" def __init__(self, name): """Initialize with name of matrix. Args: name: name of interaction matrix """ self.name = name self.value = None self.ordered_keys = [] self.dictionary = {}
[docs] def add(self, words): """Add values to matrix. Args: words: values to add """ new_group = words[0] self.ordered_keys.append(new_group) if new_group not in self.dictionary.keys(): self.dictionary[new_group] = {} for i, group in enumerate(self.ordered_keys): if len(words) > i+1: try: self.value = float(words[i+1]) except ValueError: self.value = words[i+1] self.dictionary[group][new_group] = self.value self.dictionary[new_group][group] = self.value
[docs] def get_value(self, item1, item2): """Get specific matrix value. Args: item1: matrix row index item2: matrix column index Returns: matrix value or None """ try: return self.dictionary[item1][item2] except KeyError: return None
def __getitem__(self, group): """Get specific group from matrix. Args: group: group to get """ if group not in self.dictionary.keys(): str_ = '{0:s} not found in interaction matrix {1:s}'.format( group, self.name) raise KeyError(str_) return self.dictionary[group]
[docs] def keys(self): """Get keys from matrix. Returns: dictionary key list """ return self.dictionary.keys()
def __str__(self): str_ = ' ' for key in self.ordered_keys: str_ += '{0:>3s} '.format(key) str_ += '\n' for key1 in self.ordered_keys: str_ += '{0:>3s} '.format(key1) for key2 in self.ordered_keys: str_ += '{0:>3s} '.format(self[key1][key2]) str_ += '\n' return str_
[docs]class PairwiseMatrix: """Pairwise interaction matrix class.""" def __init__(self, name): """Initialize pairwise matrix. Args: name: name of pairwise interaction """ self.name = name self.dictionary = {} self.default = [0.0, 0.0]
[docs] def add(self, words): """Add information to the matrix. TODO - this function unnecessarily bundles arguments into a tuple Args: words: tuple with assignment information and value """ # assign the default value if len(words) == 3 and words[0] == 'default': self.default = [float(words[1]), float(words[2])] return # assign non-default values group1 = words[0] group2 = words[1] value = [float(words[2]), float(words[3])] self.insert(group1, group2, value) self.insert(group2, group1, value)
[docs] def insert(self, key1, key2, value): """Insert value into matrix. Args: key1: first matrix key (row) key2: second matrix key (column) value: value to insert """ if key1 in self.dictionary and key2 in self.dictionary[key1]: if key1 != key2: str_ = ( 'Parameter value for {0:s}, {1:s} defined more ' 'than once'.format(key1, key2)) _LOGGER.warning(str_) if key1 not in self.dictionary: self.dictionary[key1] = {} self.dictionary[key1][key2] = value
[docs] def get_value(self, item1, item2): """Get specified value from matrix. Args: item1: row index item2: column index Returns: matrix value (or default) """ try: return self.dictionary[item1][item2] except KeyError: return self.default
def __getitem__(self, group): """Get item from matrix corresponding to specific group. Args: group: group to retrieve Returns: matrix information """ if group not in self.dictionary.keys(): str_ = '{0:s} not found in interaction matrix {1:s}'.format( group, self.name) raise KeyError(str_) return self.dictionary[group]
[docs] def keys(self): """Get keys from matrix. Returns: dictionary key list """ return self.dictionary.keys()
def __str__(self): str_ = '' for key1 in self.keys(): for key2 in self[key1].keys(): str_ += '{0:s} {1:s} {2:s}\n'.format( key1, key2, self[key1][key2]) return str_