earthsnake/earthsnake/base32.py

"""Base32 encoding/decoding

For base32 encoding we use rfc4648, no padding, lowercase, prefixed with "b".

Base32 character set: 'abcdefghijklmnopqrstuvwxyz234567'

The Multibase format adds a "b" prefix to specify this particular encoding.
We leave the "b" prefix there because we don't want the encoded string
to start with a number (so we can use it as a URL location).

When decoding, we require it to start with a "b" -- no other multibase formats are allowed.

The decoding must be strict (it doesn't allow a 1 in place of an i, etc).
"""

from base64 import b32decode, b32encode


def base32_bytes_to_string(bytes_: bytes) -> str:
    """Encode uint8array bytes to base32 string"""

    return 'b' + b32encode(bytes_).lower().strip(b'=').decode('utf-8')


def base32_string_to_bytes(string: str) -> bytes:
    """Decode base32 string to a uint8array of bytes

    :raises ValidationError: if the string is bad
    """

    if not string.startswith("b"):
        raise ValueError(f"can't decode base32 string - it should start with a 'b'. {str}")

    string = string[1:]

    # this library combines padding and looseness settings into a single "loose" option, so
    # we have to set "loose: true" in order to handle unpadded inputs.
    # with a custom codec, loose mode:
    # -- allows padding or no padding -- we have to check for this
    # -- does not allow uppercase -- good
    # -- does not allow 1/i substitution -- good

    # make sure no padding characters are on the end
    if string.endswith("="):
        raise ValueError("can't decode base32 string - it contains padding characters ('=')")

    pad_length = 8 - len(string) % 8
    string += '=' * pad_length

    return b32decode(string.upper())