import os import sys from unicodedata import normalize from secrets import randbits from typing import ( Optional, Sequence, Tuple, ) from eth2deposit.utils.crypto import ( SHA256, PBKDF2, ) def _resource_path(relative_path: str) -> str: """ Get the absolute path to a resource in a manner friendly to PyInstaller. PyInstaller creates a temp folder and stores path in _MEIPASS which this function swaps into a resource path so it is avaible both when building binaries and running natively. """ try: base_path = sys._MEIPASS # type: ignore except Exception: base_path = os.path.abspath(".") return os.path.join(base_path, relative_path) def _get_word_list(language: str, path: str) -> Sequence[str]: """ Given the language and path to the wordlist, return the list of BIP39 words. Ref: https://github.com/bitcoin/bips/blob/master/bip-0039/bip-0039-wordlists.md """ path = _resource_path(path) dirty_list = open(os.path.join(path, '%s.txt' % language), encoding='utf-8').readlines() return [word.replace('\n', '') for word in dirty_list] def _index_to_word(word_list: Sequence[str], index: int) -> str: """ Return the corresponding word for the supplied index while stripping out '\\n' chars. """ if index >= 2048: raise IndexError(f"`index` should be less than 2048. Got {index}.") return word_list[index] def _word_to_index(word_list: Sequence[str], word: str) -> int: try: return word_list.index(word) except ValueError: raise ValueError('Word %s not in BIP39 word-list' % word) def _uint11_array_to_uint(uint11_array: Sequence[int]) -> int: return sum(x << i * 11 for i, x in enumerate(reversed(uint11_array))) def get_seed(*, mnemonic: str, password: str) -> bytes: """ Derive the seed for the pre-image root of the tree. Ref: https://github.com/bitcoin/bips/blob/master/bip-0039.mediawiki#from-mnemonic-to-seed """ encoded_mnemonic = normalize('NFKD', mnemonic).encode('utf-8') salt = normalize('NFKD', 'mnemonic' + password).encode('utf-8') return PBKDF2(password=encoded_mnemonic, salt=salt, dklen=64, c=2048, prf='sha512') def get_languages(path: str) -> Tuple[str, ...]: """ Walk the `path` and list all the languages with word-lists available. """ path = _resource_path(path) (_, _, filenames) = next(os.walk(path)) filenames = [f for f in filenames if f[-4:] == '.txt'] languages = tuple([name[:-4] for name in filenames]) return languages def determine_mnemonic_language(mnemonic: str, words_path: str) -> Sequence[str]: """ Given a `mnemonic` determine what language[s] it is written in. There are collisions between word-lists, so multiple candidate languages are returned. """ languages = get_languages(words_path) word_language_map = {word: lang for lang in languages for word in _get_word_list(lang, words_path)} try: mnemonic_list = mnemonic.split(' ') word_languages = [word_language_map[word] for word in mnemonic_list] return list(set(word_languages)) except KeyError: raise ValueError('Word not found in mnemonic word lists for any language.') def _validate_entropy_length(entropy: bytes) -> None: entropy_length = len(entropy) * 8 if entropy_length not in range(128, 257, 32): raise IndexError(f"`entropy_length` should be in [128, 160, 192, 224, 256]. Got {entropy_length}.") def _get_checksum(entropy: bytes) -> int: """ Determine the index of the checksum word given the entropy """ _validate_entropy_length(entropy) checksum_length = len(entropy) // 4 return int.from_bytes(SHA256(entropy), 'big') >> (256 - checksum_length) def verify_mnemonic(mnemonic: str, words_path: str) -> bool: """ Given a mnemonic, verify it against its own checksum." """ try: languages = determine_mnemonic_language(mnemonic, words_path) except ValueError: return False for language in languages: try: word_list = _get_word_list(language, words_path) mnemonic_list = mnemonic.split(' ') if len(mnemonic_list) not in range(12, 25, 3): return False word_indices = [_word_to_index(word_list, word) for word in mnemonic_list] mnemonic_int = _uint11_array_to_uint(word_indices) checksum_length = len(mnemonic_list) // 3 checksum = mnemonic_int & 2**checksum_length - 1 entropy = (mnemonic_int - checksum) >> checksum_length entropy_bits = entropy.to_bytes(checksum_length * 4, 'big') return _get_checksum(entropy_bits) == checksum except ValueError: pass return False def get_mnemonic(*, language: str, words_path: str, entropy: Optional[bytes]=None) -> str: """ Return a mnemonic string in a given `language` based on `entropy` via the calculated checksum. Ref: https://github.com/bitcoin/bips/blob/master/bip-0039.mediawiki#generating-the-mnemonic """ if entropy is None: entropy = randbits(256).to_bytes(32, 'big') entropy_length = len(entropy) * 8 checksum_length = (entropy_length // 32) checksum = _get_checksum(entropy) entropy_bits = int.from_bytes(entropy, 'big') << checksum_length entropy_bits += checksum entropy_length += checksum_length mnemonic = [] word_list = _get_word_list(language, words_path) for i in range(entropy_length // 11 - 1, -1, -1): index = (entropy_bits >> i * 11) & 2**11 - 1 word = _index_to_word(word_list, index) mnemonic.append(word) return ' '.join(mnemonic)