From 68629acebea8a4ba683c7d2670ef7fac5f5cfba6 Mon Sep 17 00:00:00 2001 From: mjkeating Date: Sat, 8 Dec 2018 15:25:59 -0800 Subject: [PATCH] Added tree_hash impl for Vec and Hashtable (list and container); plus various cleanup code --- beacon_chain/types/src/shard_and_committee.rs | 4 - beacon_chain/types/src/validator_record.rs | 36 ++++---- beacon_chain/utils/ssz/src/impl_encode.rs | 4 +- beacon_chain/utils/ssz/src/impl_tree_hash.rs | 92 ++++++++++++++++--- beacon_chain/utils/ssz/src/tree_hash.rs | 51 +--------- 5 files changed, 99 insertions(+), 88 deletions(-) diff --git a/beacon_chain/types/src/shard_and_committee.rs b/beacon_chain/types/src/shard_and_committee.rs index 8388b9cbc..9982611e8 100644 --- a/beacon_chain/types/src/shard_and_committee.rs +++ b/beacon_chain/types/src/shard_and_committee.rs @@ -18,10 +18,6 @@ impl ShardAndCommittee { } impl TreeHash for ShardAndCommittee { - // python sample code: - // def hash_shard_and_committee(val): - // committee = merkle_hash([x.to_bytes(3, 'big') for x in val.committee]) - // return hash(val.shard_id.to_bytes(2, 'big') + committee) fn tree_hash(&self) -> Vec { let mut committee_ssz_items = Vec::new(); for c in &self.committee { diff --git a/beacon_chain/types/src/validator_record.rs b/beacon_chain/types/src/validator_record.rs index 3d4a57e20..1b18a0643 100644 --- a/beacon_chain/types/src/validator_record.rs +++ b/beacon_chain/types/src/validator_record.rs @@ -2,6 +2,16 @@ use super::bls::{Keypair, PublicKey}; use super::ssz::TreeHash; use super::{Address, Hash256}; +pub const HASH_SSZ_VALIDATOR_RECORD_LENGTH: usize = { + 32 + // pubkey.to_bytes(32, 'big') + 2 + // withdrawal_shard.to_bytes(2, 'big') + 20 + // withdrawal_address + 32 + // randao_commitment + 16 + // balance.to_bytes(16, 'big') + 16 + // start_dynasty.to_bytes(8, 'big') + 8 // end_dynasty.to_bytes(8, 'big') +}; + #[derive(Debug, PartialEq, Clone, Copy)] pub enum ValidatorStatus { PendingActivation = 0, @@ -46,22 +56,15 @@ impl ValidatorRecord { } impl TreeHash for ValidatorRecord { - /* python sample code: - def hash_validator_record(val): - return hash(val.pubkey.to_bytes(32, 'big') + val.withdrawal_shard.to_bytes(2, 'big') + \ - val.withdrawal_address + val.randao_commitment + val.balance.to_bytes(16, 'big') + \ - val.start_dynasty.to_bytes(8, 'big') + val.end_dynasty.to_bytes(8, 'big')) - */ fn tree_hash(&self) -> Vec { - // the serialized fields, to be hashed, should add up to 118 bytes in length. - // allocating it once here - let mut ssz = Vec::with_capacity(118); + let mut ssz = Vec::with_capacity(HASH_SSZ_VALIDATOR_RECORD_LENGTH); - // "val.pubkey.to_bytes(32, 'big')" logic + // From python sample: "val.pubkey.to_bytes(32, 'big')" // TODO: - // probably all kinds of wrong here. Not sure how to convert (szz) - // pubkey into a big-endian 32 byte array. Note: as_bytes(), the only method on - // PublicKey, returns a 192 byte array. + // Need to actually convert (szz) pubkey into a big-endian 32 byte + // array. + // Also, our ValidatorRecord seems to be missing the start_dynasty + // and end_dynasty fields let pub_key_bytes = &mut self.pubkey.as_bytes(); pub_key_bytes.resize(32, 0); ssz.append(pub_key_bytes); @@ -70,17 +73,10 @@ impl TreeHash for ValidatorRecord { ssz.append(&mut self.withdrawal_address.tree_hash()); ssz.append(&mut self.randao_commitment.tree_hash()); - // balance is a 64bit number that serializes to 8 bytes. - // Right padding here to resize to 16 bytes - not sure why - // a 16 byte array is implemented in the python code: "val.balance.to_bytes(16, 'big')" let mut balance = self.balance.tree_hash(); balance.resize(16, 0); ssz.append(&mut balance); - // TODO: - // ... + val.start_dynasty.to_bytes(8, 'big') + val.end_dynasty.to_bytes(8, 'big') - // Our ValidatorRecord seems to be missing the start_dynasty and end_dynasty fields - ssz.tree_hash() } } diff --git a/beacon_chain/utils/ssz/src/impl_encode.rs b/beacon_chain/utils/ssz/src/impl_encode.rs index c9ca8b006..8a05b9705 100644 --- a/beacon_chain/utils/ssz/src/impl_encode.rs +++ b/beacon_chain/utils/ssz/src/impl_encode.rs @@ -48,14 +48,12 @@ impl_encodable_for_uint!(usize, 64); impl Encodable for H256 { fn ssz_append(&self, s: &mut SszStream) { - assert_eq!(32, self.len()); s.append_encoded_raw(&self.to_vec()); } } impl Encodable for Address { fn ssz_append(&self, s: &mut SszStream) { - assert_eq!(20, self.len()); s.append_encoded_raw(&self) } } @@ -73,7 +71,7 @@ mod tests { } #[test] - fn test_ssz_encode_adress() { + fn test_ssz_encode_address() { let h = Address::zero(); let mut ssz = SszStream::new(); ssz.append(&h); diff --git a/beacon_chain/utils/ssz/src/impl_tree_hash.rs b/beacon_chain/utils/ssz/src/impl_tree_hash.rs index c3fccb2bc..8c5a5d17a 100644 --- a/beacon_chain/utils/ssz/src/impl_tree_hash.rs +++ b/beacon_chain/utils/ssz/src/impl_tree_hash.rs @@ -1,13 +1,11 @@ extern crate blake2_rfc; use self::blake2_rfc::blake2b::blake2b; - use super::ethereum_types::{Address, H256}; use super::{ssz_encode, TreeHash}; - -// I haven't added tests for tree_hash implementations that simply pass -// thru to the szz_encode lib for which tests already exist. Do we want -// test anyway? +use std::cmp::Ord; +use std::collections::HashMap; +use std::hash::Hash; impl TreeHash for u8 { fn tree_hash(&self) -> Vec { @@ -45,23 +43,87 @@ impl TreeHash for H256 { } } -// hash byte arrays impl TreeHash for [u8] { fn tree_hash(&self) -> Vec { hash(&self) } } -/** - * From the Spec: - * We define hash(x) as BLAKE2b-512(x)[0:32] - * From the python sample code: - * return blake2b(x).digest()[:32] - * - * This was orginally writting for blake2s before it was changed to blake2b - * Perhaps, we should be using 'canonical_hash' in the hashing lib? - */ +impl TreeHash for Vec +where + T: TreeHash, +{ + /// Appends the tree_hash for each value of 'self' into a byte array + /// and returns the hash of said byte array + fn tree_hash(&self) -> Vec { + let mut result = Vec::new(); + for x in self { + result.append(&mut x.tree_hash()); + } + + hash(&result) + } +} + +impl TreeHash for HashMap +where + K: Eq, + K: Hash, + K: Ord, + V: TreeHash, +{ + /// Appends the tree_hash for each value of 'self, sorted by key, + /// into a byte array and returns the hash of said byte array + fn tree_hash(&self) -> Vec { + let mut items: Vec<_> = self.iter().collect(); + items.sort_by(|a, b| a.0.cmp(b.0)); + let mut result = Vec::new(); + for item in items { + result.append(&mut item.1.tree_hash()); + } + + hash(&result) + } +} + +/// From the Spec: +/// We define hash(x) as BLAKE2b-512(x)[0:32] fn hash(data: &[u8]) -> Vec { let result = blake2b(32, &[], &data); result.as_bytes().to_vec() } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_impl_tree_hash_vec() { + let result = vec![1u32, 2, 3, 4, 5, 6, 7].tree_hash(); + assert_eq!(result.len(), 32); + } + + #[test] + fn test_impl_tree_hash_hashmap() { + let mut map = HashMap::new(); + map.insert("c", 3); + map.insert("b", 2); + map.insert("g", 7); + map.insert("d", 6); + map.insert("e", 4); + map.insert("a", 1u32); + map.insert("f", 5); + let result = map.tree_hash(); + + // TODO: resolve inconsistencies between the python sample code and + // the spec; and create tests that tie-out to an offical result + assert_eq!( + result, + [ + 59, 110, 242, 24, 177, 184, 73, 109, 190, 19, 172, 39, 74, 94, 224, 198, 0, 170, + 225, 152, 249, 59, 10, 76, 137, 124, 52, 159, 37, 42, 26, 157 + ] + ); + } + +} diff --git a/beacon_chain/utils/ssz/src/tree_hash.rs b/beacon_chain/utils/ssz/src/tree_hash.rs index 63b574049..60a052b35 100644 --- a/beacon_chain/utils/ssz/src/tree_hash.rs +++ b/beacon_chain/utils/ssz/src/tree_hash.rs @@ -2,71 +2,32 @@ const CHUNKSIZE: usize = 128; const HASHSIZE: usize = 32; pub trait TreeHash { - // Note: it would be nice to have a default trait implementation here - // i.e. szz_encode(self) - but rust complains it does not know - // the size of 'self'. Not sure if there's a way around this. - fn tree_hash(&self) -> Vec; } -// python example: Note - I'm seeing some inconsistencies -// between this and the 'Tree Hash' section in the SSZ spec. -// So, I imagine it will change. -/* def merkle_hash(lst): - # Concatenate list into data - if len(lst[0]) != next_power_of_2(len(lst[0])): - lst = [extend_to_power_of_2(x) for x in lst] - data = b''.join(lst) - # Add padding - data += b'\x00' * (CHUNKSIZE - (len(data) % CHUNKSIZE or CHUNKSIZE)) - assert len(data) % CHUNKSIZE == 0 - # Store length (to compensate for non-bijectiveness of padding) - datalen = len(lst).to_bytes(32, 'big') - # Convert to chunks - chunkz = [data[i:i+CHUNKSIZE] for i in range(0, len(data), CHUNKSIZE)] - chunkz = [None] * next_power_of_2(len(chunkz)) + chunkz + [b'\x00' * CHUNKSIZE] - for i in range(len(chunkz)//2 - 1, 0, -1): - chunkz[i] = hash(chunkz[i*2] + chunkz[i*2+1]) - return hash(chunkz[1] + datalen) */ - -/** - * Returns a 32 byte hash of 'list', a vector of byte vectors. - * Note that this will consume 'list'. - * */ +/// Returns a 32 byte hash of 'list' - a vector of byte vectors. +/// Note that this will consume 'list'. pub fn merkle_hash(list: &mut Vec>) -> Vec { // flatten list let data = &mut list_to_blob(list); - // data should be divisible by CHUNKSIZE - assert_eq!(data.len() % CHUNKSIZE, 0); - // get data_len as bytes. It will hashed will the merkle root let dlen = data.len() as u64; let data_len_bytes = &mut dlen.tree_hash(); data_len_bytes.resize(32, 0); // merklize - // - // From the Spec: - // while len(chunkz) > 1: - // if len(chunkz) % 2 == 1: - // chunkz.append(b'\x00' * SSZ_CHUNK_SIZE) - // chunkz = [hash(chunkz[i] + chunkz[i+1]) for i in range(0, len(chunkz), 2)] let mut mhash = hash_level(data, CHUNKSIZE); while mhash.len() > HASHSIZE { mhash = hash_level(&mut mhash, HASHSIZE); } - assert_eq!(mhash.len(), HASHSIZE); - mhash.append(data_len_bytes); mhash.tree_hash() } -/** - * Takes a flat vector of bytes. It then hashes (chunk_size * 2) into - * a byte vector of hashes, divisible by the 32 byte hashsize - */ +/// Takes a flat vector of bytes. It then hashes 'chunk_size * 2' slices into +/// a byte vector of hashes, divisible by HASHSIZE fn hash_level(data: &mut Vec, chunk_size: usize) -> Vec { assert!(data.len() % chunk_size == 0); @@ -119,9 +80,7 @@ fn list_to_blob(list: &mut Vec>) -> Vec { data } -/** - * Extends data length to a power of 2 by minimally right-zero-padding - */ +/// Extends data length to a power of 2 by minimally right-zero-padding fn extend_to_power_of_2(data: &mut Vec) { let len = data.len(); let new_len = len.next_power_of_two();