"""
Configuration file parameters
=============================
Holds parameters and settings that can be set in :file:`propka.cfg`. The file
format consists of lines of ``keyword value [value ...]``, blank lines, and
comment lines (introduced with ``#``).
The module attributes below list the names and types of all key words
in configuration file.
"""
import logging
from dataclasses import dataclass, field
from typing import Callable, Dict, List, Sequence, Tuple, TypeVar, Union
try:
# New in version 3.10, deprecated since version 3.12
from typing import TypeAlias
except ImportError:
TypeAlias = "TypeAlias" # type: ignore
_LOGGER = logging.getLogger(__name__)
class squared_property:
def __set_name__(self, owner, name: str):
assert name.endswith("_squared")
self._name_not_squared = name[:-len("_squared")] # removesuffix()
def __get__(self, instance, owner=None) -> float:
if instance is None:
return self # type: ignore[return-value]
return getattr(instance, self._name_not_squared)**2
def __set__(self, instance, value: float):
setattr(instance, self._name_not_squared, value**0.5)
T = TypeVar("T")
_T_MATRIX: TypeAlias = "InteractionMatrix"
_T_PAIR_WISE_MATRIX: TypeAlias = "PairwiseMatrix"
_T_NUMBER_DICTIONARY = Dict[str, float]
_T_LIST_DICTIONARY = Dict[str, list]
_T_STRING_DICTIONARY = Dict[str, str]
_T_STRING_LIST = List[str]
_T_STRING = str
_T_BOOL = bool
[docs]
@dataclass
class Parameters:
"""PROPKA parameter class."""
# MATRICES
interaction_matrix: _T_MATRIX = field(
default_factory=lambda: InteractionMatrix("interaction_matrix"))
# PAIR_WISE_MATRICES
sidechain_cutoffs: _T_PAIR_WISE_MATRIX = field(
default_factory=lambda: PairwiseMatrix("sidechain_cutoffs"))
# NUMBER_DICTIONARIES
VanDerWaalsVolume: _T_NUMBER_DICTIONARY = field(default_factory=dict)
charge: _T_NUMBER_DICTIONARY = field(default_factory=dict)
model_pkas: _T_NUMBER_DICTIONARY = field(default_factory=dict)
ions: _T_NUMBER_DICTIONARY = field(default_factory=dict)
valence_electrons: _T_NUMBER_DICTIONARY = field(default_factory=dict)
custom_model_pkas: _T_NUMBER_DICTIONARY = field(default_factory=dict)
# LIST_DICTIONARIES
backbone_NH_hydrogen_bond: _T_LIST_DICTIONARY = field(default_factory=dict)
backbone_CO_hydrogen_bond: _T_LIST_DICTIONARY = field(default_factory=dict)
# STRING_DICTIONARIES
protein_group_mapping: _T_STRING_DICTIONARY = field(default_factory=dict)
# STRING_LISTS
ignore_residues: _T_STRING_LIST = field(default_factory=list)
angular_dependent_sidechain_interactions: _T_STRING_LIST = field(default_factory=list)
acid_list: _T_STRING_LIST = field(default_factory=list)
base_list: _T_STRING_LIST = field(default_factory=list)
exclude_sidechain_interactions: _T_STRING_LIST = field(default_factory=list)
backbone_reorganisation_list: _T_STRING_LIST = field(default_factory=list)
write_out_order: _T_STRING_LIST = field(default_factory=list)
# DISTANCES
desolv_cutoff: float = 20.0
buried_cutoff: float = 15.0
coulomb_cutoff1: float = 4.0
coulomb_cutoff2: float = 10.0
# DISTANCES SQUARED
desolv_cutoff_squared = squared_property()
buried_cutoff_squared = squared_property()
coulomb_cutoff1_squared = squared_property()
coulomb_cutoff2_squared = squared_property()
# STRINGS
version: _T_STRING = "VersionA"
output_file_tag: _T_STRING = ""
ligand_typing: _T_STRING = "groups"
pH: _T_STRING = "variable"
reference: _T_STRING = "neutral"
# PARAMETERS
Nmin: int = 280
Nmax: int = 560
desolvationSurfaceScalingFactor: float = 0.25
desolvationPrefactor: float = -13.0
desolvationAllowance: float = 0.0
coulomb_diel: float = 80.0
# TODO - it would be nice to rename these; they're defined everywhere
COO_HIS_exception: float = 1.60
OCO_HIS_exception: float = 1.60
CYS_HIS_exception: float = 1.60
CYS_CYS_exception: float = 3.60
min_ligand_model_pka: float = -10.0
max_ligand_model_pka: float = 20.0
coupling_max_number_of_bonds: int = 3
min_bond_distance_for_hydrogen_bonds: int = 4
shared_determinants: _T_BOOL = False
common_charge_centre: _T_BOOL = False
remove_penalised_group: _T_BOOL = True
max_intrinsic_pka_diff: float = 2.0
min_interaction_energy: float = 0.5
max_free_energy_diff: float = 1.0
min_swap_pka_shift: float = 1.0
min_pka: float = 0.0
max_pka: float = 10.0
sidechain_interaction: float = 0.85
[docs]
def parse_line(self, line):
"""Parse parameter file line."""
# first, remove comments
comment_pos = line.find('#')
if comment_pos != -1:
line = line[:comment_pos]
# split the line into words
words = line.split()
if len(words) == 0:
return
# parse the words
typeannotation = self.__annotations__.get(words[0])
if typeannotation is _T_NUMBER_DICTIONARY:
self.parse_to_number_dictionary(words)
elif typeannotation is _T_STRING_LIST:
self.parse_to_string_list(words)
elif typeannotation is _T_STRING:
self.parse_string(words)
elif typeannotation is _T_LIST_DICTIONARY:
self.parse_to_list_dictionary(words)
elif typeannotation is _T_MATRIX or typeannotation is _T_PAIR_WISE_MATRIX:
self.parse_to_matrix(words)
elif typeannotation is _T_STRING_DICTIONARY:
self.parse_to_string_dictionary(words)
elif typeannotation is int or typeannotation is _T_BOOL:
self.parse_parameter(words, int)
else:
self.parse_parameter(words, float)
[docs]
def parse_to_number_dictionary(self, words):
"""Parse field to number dictionary.
Args:
words: strings to parse.
"""
assert len(words) == 3, words
dict_ = getattr(self, words[0])
key = words[1]
value = words[2]
dict_[key] = float(value)
[docs]
def parse_to_string_dictionary(self, words):
"""Parse field to string dictionary.
Args:
words: strings to parse
"""
assert len(words) == 3, words
dict_ = getattr(self, words[0])
key = words[1]
value = words[2]
dict_[key] = value
[docs]
def parse_to_list_dictionary(self, words: List[str]):
"""Parse field to list dictionary.
Args:
words: strings to parse.
"""
assert len(words) > 2, words
dict_ = getattr(self, words[0])
key = words[1]
if key not in dict_:
dict_[key] = []
for value in words[2:]:
if isinstance(value, list):
dict_[key].append([float(x) for x in value])
dict_[key].append(float(value))
[docs]
def parse_to_string_list(self, words):
"""Parse field to string list.
Args:
words: strings to parse
"""
assert len(words) == 2, words
list_ = getattr(self, words[0])
value = words[1]
list_.append(value)
[docs]
def parse_to_matrix(self, words):
"""Parse field to matrix.
Args:
words: strings to parse
"""
matrix = getattr(self, words[0])
value = tuple(words[1:])
matrix.add(value)
[docs]
def parse_parameter(self, words, typefunc: Callable[[str], T]):
"""Parse field to parameters.
Args:
words: strings to parse
"""
assert len(words) == 2, words
value = typefunc(words[1])
setattr(self, words[0], value)
[docs]
def parse_string(self, words):
"""Parse field to strings.
Args:
words: strings to parse
"""
assert len(words) == 2, words
setattr(self, words[0], words[1])
[docs]
def print_interaction_parameters(self):
"""Print interaction parameters."""
_LOGGER.info('--------------- Model pKa values ----------------------')
for k in self.model_pkas:
_LOGGER.info('{0:>3s} {1:8.2f}'.format(k, self.model_pkas[k]))
_LOGGER.info('')
_LOGGER.info('--------------- Interactions --------------------------')
agroups = [
'COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', 'ARG',
'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR',
'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH']
lgroups = [
'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1',
'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH']
map_ = {
'CG': ['ARG'], 'C2N': ['ARG'], 'N30': ['N+', 'LYS'],
'N31': ['N+', 'LYS'], 'N32': ['N+', 'LYS'], 'N33': ['N+', 'LYS'],
'NAR': ['HIS'], 'OCO': ['COO'], 'OP': [], 'SH': ['CYS'],
'NP1': [], 'OH': ['ROH'], 'O3': [], 'CL': [], 'F': [],
'NAM': ['AMD'], 'N1': [], 'O2': []}
for group1 in agroups:
for group2 in lgroups:
fmt = "{grp1:>3s} {grp2:>3s} {mat:1s} {val1:4} {val2:4}"
interaction = fmt.format(
grp1=group1, grp2=group2,
mat=self.interaction_matrix[group1][group2],
val1=self.sidechain_cutoffs.get_value(group1, group2)[0],
val2=self.sidechain_cutoffs.get_value(group1, group2)[1])
map_interaction = ''
if group2 in map_:
for val in map_[group2]:
fmt = (
"|{grp1:>3s} {grp2:>3s} {mat:1s} {val1:4} {val2:4}"
)
map_interaction += fmt.format(
group1, val, self.interaction_matrix[group1][val],
self.sidechain_cutoffs.get_value(group1, val)[0],
self.sidechain_cutoffs.get_value(group1, val)[1])
if (self.interaction_matrix[group1][val]
!= self.interaction_matrix[group1][group2]):
map_interaction += '* '
if (self.sidechain_cutoffs.get_value(group1, val)[0]
!= self.sidechain_cutoffs.get_value(
group1, group2)[0]
or self.sidechain_cutoffs.get_value(
group1, val)[1]
!= self.sidechain_cutoffs.get_value(
group1, group2)[1]):
map_interaction += '! '
else:
map_interaction += ' '
if (len(map_[group2]) == 0
and (self.sidechain_cutoffs.get_value(
group1, group2)[0]
!= 3
or self.sidechain_cutoffs.get_value(
group1, group2)[1]
!= 4)):
map_interaction += '? '
_LOGGER.info("%s %s", interaction, map_interaction)
if group1 == group2:
break
_LOGGER.info('-')
_LOGGER.info('--------------- Exceptions ----------------------------')
_LOGGER.info('COO-HIS %s', self.COO_HIS_exception)
_LOGGER.info('OCO-HIS %s', self.OCO_HIS_exception)
_LOGGER.info('CYS-HIS %s', self.CYS_HIS_exception)
_LOGGER.info('CYS-CYS %s', self.CYS_CYS_exception)
_LOGGER.info('--------------- Mapping -------------------------------')
_LOGGER.info("""
Titratable:
CG ARG
C2N ARG
N30 N+/LYS
N31 N+/LYS
N32 N+/LYS
N33 N+/LYS
NAR HIS
OCO COO
OP TYR/SER?
SH CYS
Non-titratable:
NP1 AMD?
OH ROH
O3 ?
CL
F
NAM
N1
O2
""")
[docs]
def print_interaction_parameters_latex(self):
"""Print interaction parameters in LaTeX format."""
# TODO - if these lists and dictionaries are the same as above, then
# should be constants at the level of the module
agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD',
'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32',
'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM',
'N1', 'O2', 'OP', 'SH']
lgroups = ['CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO',
'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP',
'SH']
lines = [
"",
"\\begin{{longtable}}{{lllll}}",
("\\caption{{Ligand interaction parameters. For interactions not "
"listed, the default value of {0:s} is applied.}}").format(
self.sidechain_cutoffs.default),
"\\label{{tab:ligand_interaction_parameters}}\\\\",
"\\toprule",
"Group1 & Group2 & Interaction & c1 &c2 \\\\",
"\\midrule",
"\\endfirsthead",
"",
"\\multicolumn{{5}}{{l}}{\\emph{{continued from the previous "
"page}}}\\\\",
"\\toprule",
"Group1 & Group2 & Interaction & c1 &c2 \\\\",
"\\midrule",
"\\endhead",
"",
"\\midrule",
"\\multicolumn{{5}}{{r}}{\\emph{{continued on the next "
"page}}}\\\\",
"\\endfoot",
"",
"\\bottomrule",
"\\endlastfoot",
""]
str_ = "\n".join(lines)
for group1 in agroups:
for group2 in lgroups:
if self.interaction_matrix[group1][group2] == '-':
continue
if (self.sidechain_cutoffs.get_value(group1, group2)
== self.sidechain_cutoffs.default):
continue
fmt = (
"{grp1:>3s} & {grp2:>3s} & {mat:1s} & {val1:4} & "
"{val2:4}\\\\ \n")
str_ += fmt.format(
group1, group2,
self.interaction_matrix[group1][group2],
self.sidechain_cutoffs.get_value(group1, group2)[0],
self.sidechain_cutoffs.get_value(group1, group2)[1])
if group1 == group2:
break
str_ += ' \\end{{longtable}}\n'
_LOGGER.info(str_)
[docs]
def print_interactions_latex(self):
"""Print interactions in LaTeX."""
# TODO - are these the same lists as above? Convert to module
# constants.
agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD',
'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32',
'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM',
'N1', 'O2', 'OP', 'SH']
lines = [
"",
"\\begin{{longtable}}{{{0:s}}}".format('l'*len(agroups)),
("\\caption{{Ligand interaction parameters. For interactions not "
"listed, the default value of {0:s} is applied.}}").format(
str(self.sidechain_cutoffs.default)),
"\\label{{tab:ligand_interaction_parameters}}\\\\",
"\\toprule",
"Group1 & Group2 & Interaction & c1 &c2 \\\\",
"\\midrule",
"\\endfirsthead",
"",
"\\multicolumn{{5}}{{l}}{\\emph{{continued from the previous "
"page}}}\\\\",
"\\toprule",
"Group1 & Group2 & Interaction & c1 &c2 \\\\",
"\\midrule",
"\\endhead",
"",
"\\midrule",
"\\multicolumn{{5}}{{r}}{\\emph{{continued on the next "
"page}}}\\\\",
"\\endfoot",
"",
"\\bottomrule",
"\\endlastfoot",
""
]
str_ = "\n".join(lines)
for group1 in agroups:
for group2 in agroups:
fmt = (
'{g1:>3s} & {g2:>3s} & {mat:1s} & {val1:>4s} & '
'{val2:>4s}\\\\ \n'
)
str_ += fmt.format(
group1, group2, self.interaction_matrix[group1][group2],
str(self.sidechain_cutoffs.get_value(group1, group2)[0]),
str(self.sidechain_cutoffs.get_value(group1, group2)[1]))
if group1 == group2:
break
str_ += ' \\end{{longtable}}\n'
_LOGGER.info(str_)
[docs]
class InteractionMatrix:
"""Interaction matrix class."""
def __init__(self, name: str):
"""Initialize with name of matrix.
Args:
name: name of interaction matrix
"""
self.name = name
self.ordered_keys: List[str] = []
self.dictionary: Dict[str, Dict[str, Union[str, float]]] = {}
[docs]
def add(self, words: Sequence[str]):
"""Add values to matrix.
Args:
words: values to add
"""
len_expected = len(self.ordered_keys) + 2
if len(words) != len_expected:
raise ValueError(f"Expected {len_expected} arguments, got {words!r}")
new_group = words[0]
self.ordered_keys.append(new_group)
if new_group not in self.dictionary.keys():
self.dictionary[new_group] = {}
for i, group in enumerate(self.ordered_keys):
if len(words) > i+1:
value: Union[str, float]
try:
value = float(words[i+1])
except ValueError:
value = words[i+1]
self.dictionary[group][new_group] = value
self.dictionary[new_group][group] = value
[docs]
def get_value(self, item1: str, item2: str) -> Union[str, float, None]:
"""Get specific matrix value.
Args:
item1: matrix row index
item2: matrix column index
Returns:
matrix value or None
"""
try:
return self.dictionary[item1][item2]
except KeyError:
return None
def __getitem__(self, group: str):
"""Get specific group from matrix.
Args:
group: group to get
"""
if group not in self.dictionary.keys():
str_ = '{0:s} not found in interaction matrix {1:s}'.format(
group, self.name)
raise KeyError(str_)
return self.dictionary[group]
[docs]
def keys(self):
"""Get keys from matrix.
Returns:
dictionary key list
"""
return self.dictionary.keys()
def __str__(self):
str_ = ' '
for key in self.ordered_keys:
str_ += '{0:>3s} '.format(key)
str_ += '\n'
for key1 in self.ordered_keys:
str_ += '{0:>3s} '.format(key1)
for key2 in self.ordered_keys:
str_ += '{0:>3s} '.format(self[key1][key2])
str_ += '\n'
return str_
[docs]
class PairwiseMatrix:
"""Pairwise interaction matrix class."""
def __init__(self, name: str):
"""Initialize pairwise matrix.
Args:
name: name of pairwise interaction
"""
self.name = name
self.dictionary: Dict[str, Dict[str, Tuple[float, float]]] = {}
self.default = (0.0, 0.0)
[docs]
def add(self, words: Sequence[str]):
"""Add information to the matrix.
TODO - this function unnecessarily bundles arguments into a tuple
Args:
words: tuple with assignment information and value
"""
# assign the default value
if len(words) == 3 and words[0] == 'default':
self.default = (float(words[1]), float(words[2]))
return
# assign non-default values
assert len(words) == 4
group1 = words[0]
group2 = words[1]
value = (float(words[2]), float(words[3]))
self.insert(group1, group2, value)
self.insert(group2, group1, value)
[docs]
def insert(self, key1: str, key2: str, value: Tuple[float, float]):
"""Insert value into matrix.
Args:
key1: first matrix key (row)
key2: second matrix key (column)
value: value to insert
"""
if key1 in self.dictionary and key2 in self.dictionary[key1]:
if key1 != key2:
str_ = (
'Parameter value for {0:s}, {1:s} defined more '
'than once'.format(key1, key2))
_LOGGER.warning(str_)
if key1 not in self.dictionary:
self.dictionary[key1] = {}
self.dictionary[key1][key2] = value
[docs]
def get_value(self, item1: str, item2: str) -> Tuple[float, float]:
"""Get specified value from matrix.
Args:
item1: row index
item2: column index
Returns:
matrix value (or default)
"""
try:
return self.dictionary[item1][item2]
except KeyError:
return self.default
def __getitem__(self, group: str):
"""Get item from matrix corresponding to specific group.
Args:
group: group to retrieve
Returns:
matrix information
"""
if group not in self.dictionary.keys():
str_ = '{0:s} not found in interaction matrix {1:s}'.format(
group, self.name)
raise KeyError(str_)
return self.dictionary[group]
[docs]
def keys(self):
"""Get keys from matrix.
Returns:
dictionary key list
"""
return self.dictionary.keys()
def __str__(self):
str_ = ''
for key1 in self.keys():
for key2 in self[key1].keys():
str_ += '{0:s} {1:s} {2:s}\n'.format(
key1, key2, self[key1][key2])
return str_