Source code for propka.hybrid36
"""
Hybrid36 PDB-like file format
=============================
`hybrid36`_ is an alternative PDB format that can encode larger atom
numbers. This module provides the :func:`decode` functon to parse the
atom numbers in hybrid36 "PDB" files.
.. _hybrid36: http://cci.lbl.gov/hybrid_36/
"""
import string
_HYBRID36_UPPER_CHARS = set(string.ascii_uppercase)
_HYBRID36_LOWER_CHARS = set(string.ascii_lowercase)
_HYBRID36_DIGITS = set(string.digits)
_HYBRID36_UPPER_SET = _HYBRID36_UPPER_CHARS | _HYBRID36_DIGITS
_HYBRID36_LOWER_SET = _HYBRID36_LOWER_CHARS | _HYBRID36_DIGITS
[docs]
def decode(input_string):
"""Convert an input string of a number in hybrid-36 format to an integer.
Args:
input_string: input string
Returns:
integer
"""
value_error_message = "invalid literal for hybrid-36 conversion: '{0:s}'"
original_input_string = input_string
input_string = input_string.strip()
# Manually handle negative sign.
if input_string.startswith("-"):
sign = -1
input_string = input_string[1:]
else:
sign = 1
if len(input_string) == 0:
raise ValueError(value_error_message.format(input_string))
# See http://cci.lbl.gov/hybrid_36/ for documentation on the format.
num_chars = len(input_string)
first_char = input_string[0]
if first_char in _HYBRID36_DIGITS:
return sign * int(input_string)
elif first_char in _HYBRID36_UPPER_CHARS:
reference = - (10 * 36 ** (num_chars - 1) - 10 ** num_chars)
_hybrid36_set = _HYBRID36_UPPER_SET
elif first_char in _HYBRID36_LOWER_CHARS:
reference = (16 * 36 ** (num_chars - 1) + 10 ** num_chars)
_hybrid36_set = _HYBRID36_LOWER_SET
else:
raise ValueError(value_error_message.format(original_input_string))
# Check the validity of the input string: ASCII characters should be
# either all uppercase or all lowercase.
for char in input_string[1:]:
if char not in _hybrid36_set:
raise ValueError(value_error_message.format(original_input_string))
# Convert with the int function.
return sign * (int(input_string, 36) + reference)