merklehash/src/data_hash.rs (369 lines of code) (raw):
use std::cmp::{Eq, Ord, Ordering, PartialEq, PartialOrd};
use std::error::Error;
use std::hash::{Hash, Hasher};
use std::io::Write;
use std::mem::transmute_copy;
use std::num::ParseIntError;
use std::ops::{Deref, DerefMut};
use std::{fmt, str};
// URL safe Base 64 encoding with ending characters removed.
use base64::engine::general_purpose::URL_SAFE_NO_PAD;
use base64::Engine as _;
use rand::rngs::SmallRng;
use rand::{RngCore, SeedableRng};
use safe_transmute::{transmute_to_bytes, transmute_to_bytes_mut};
use serde::{Deserialize, Serialize};
/************************************************************************* */
/* */
/* DataHash */
/* */
/************************************************************************* */
/// The DataHash is a transparent 256-bit value stores as `[u64; 4]`.
///
/// [compute_data_hash] and [compute_internal_node_hash] are the two main
/// ways in which a hash on data can be computed.
///
/// Many convenient trait implementations are provided for printing, comparing,
/// and parsing.
///
/// ```ignore
/// let string = "hello world";
/// let hash = compute_data_hash(slice.as_bytes());
/// println!("Hello Hash {}", hash);
/// ```
#[derive(Clone, Copy, Serialize, Deserialize)]
pub struct DataHash([u64; 4]);
impl Deref for DataHash {
type Target = [u64; 4];
#[inline(always)]
fn deref(&self) -> &[u64; 4] {
&self.0
}
}
impl DerefMut for DataHash {
#[inline(always)]
fn deref_mut(&mut self) -> &mut [u64; 4] {
&mut (self.0)
}
}
impl From<[u64; 4]> for DataHash {
fn from(value: [u64; 4]) -> Self {
DataHash(value)
}
}
impl From<[u8; 32]> for DataHash {
fn from(value: [u8; 32]) -> Self {
unsafe { Self(transmute_copy::<[u8; 32], [u64; 4]>(&value)) }
}
}
impl From<&[u8; 32]> for DataHash {
fn from(value: &[u8; 32]) -> Self {
unsafe { Self(transmute_copy::<[u8; 32], [u64; 4]>(value)) }
}
}
impl From<DataHash> for [u8; 32] {
fn from(val: DataHash) -> Self {
unsafe { std::mem::transmute(val) }
}
}
impl AsRef<[u8]> for DataHash {
fn as_ref(&self) -> &[u8] {
transmute_to_bytes(self.deref())
}
}
impl AsRef<DataHash> for DataHash {
fn as_ref(&self) -> &DataHash {
self
}
}
impl Default for DataHash {
/// The default constructor returns a DataHash of 0s
fn default() -> DataHash {
DataHash([0; 4])
}
}
impl PartialEq for DataHash {
fn eq(&self, other: &Self) -> bool {
self[0] == other[0] && self[1] == other[1] && self[2] == other[2] && self[3] == other[3]
}
}
impl Eq for DataHash {}
impl Ord for DataHash {
fn cmp(&self, other: &Self) -> Ordering {
self.0.cmp(&other.0)
}
}
impl PartialOrd for DataHash {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl core::ops::Rem<u64> for DataHash {
type Output = u64;
fn rem(self, rhs: u64) -> Self::Output {
self[3] % rhs
}
}
#[cfg(not(target_family = "wasm"))]
unsafe impl heed::bytemuck::Zeroable for DataHash {
fn zeroed() -> Self {
DataHash([0; 4])
}
}
#[cfg(not(target_family = "wasm"))]
unsafe impl heed::bytemuck::Pod for DataHash {}
/// The error type that is returned if [DataHash::from_hex] fails.
#[derive(Debug, Clone)]
pub struct DataHashHexParseError;
impl Error for DataHashHexParseError {}
impl fmt::Display for DataHashHexParseError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Invalid hex input for DataHash")
}
}
impl From<ParseIntError> for DataHashHexParseError {
fn from(_err: ParseIntError) -> Self {
DataHashHexParseError {}
}
}
impl DataHash {
/// Returns the hexadecimal printout of the hash.
pub fn hex(&self) -> String {
format!("{:016x}{:016x}{:016x}{:016x}", self.0[0], self.0[1], self.0[2], self.0[3])
}
/// Gives a compact base64 representation of the hash that is more compact than hex and is
/// also url and file name safe.
pub fn base64(&self) -> String {
URL_SAFE_NO_PAD.encode(self.as_bytes())
}
/// Parses a hexadecimal string as a DataHash, returning
/// Err(DataHashHexParseError) on failure.
pub fn from_hex(h: &str) -> Result<DataHash, DataHashHexParseError> {
if h.len() != 64 {
return Err(DataHashHexParseError {});
}
let mut ret: DataHash = Default::default();
let good = h.as_bytes().iter().all(|c| c.is_ascii_hexdigit());
if !good {
return Err(DataHashHexParseError {});
}
ret.0[0] = u64::from_str_radix(&h[..16], 16)?;
ret.0[1] = u64::from_str_radix(&h[16..32], 16)?;
ret.0[2] = u64::from_str_radix(&h[32..48], 16)?;
ret.0[3] = u64::from_str_radix(&h[48..64], 16)?;
Ok(ret)
}
// Converts the hash from base64 (created by base64 above) to this. Used for testing.
pub fn from_base64(b64: &str) -> Result<DataHash, DataHashBytesParseError> {
let bytes = URL_SAFE_NO_PAD.decode(b64.as_bytes()).map_err(|_| DataHashBytesParseError {})?;
DataHash::from_slice(&bytes)
}
/// Returns the datahash as a raw byte slice.
pub fn as_bytes(&self) -> &[u8] {
transmute_to_bytes(&self.0[..])
}
pub fn from_slice(value: &[u8]) -> Result<Self, DataHashBytesParseError> {
if value.len() != 32 {
return Err(DataHashBytesParseError);
}
let mut hash: DataHash = DataHash::default();
unsafe {
let src = value.as_ptr();
let dst = hash.0.as_mut_ptr() as *mut u8;
std::ptr::copy_nonoverlapping(src, dst, 32);
}
Ok(hash)
}
pub fn random_from_seed(seed: u64) -> Self {
let mut s = Self::default();
let mut rng = SmallRng::seed_from_u64(seed);
rng.fill_bytes(transmute_to_bytes_mut(&mut s.0[..]));
s
}
}
// Just use the same type here; rename for code legibility.
pub type HMACKey = DataHash;
impl DataHash {
/** Given a 256 bit key and a MerkleHash (the message), generate an HMAC hash from self.
*/
pub fn hmac(&self, key: HMACKey) -> Self {
// Use the blake 3 keyed hash method as the HMac
Self::from(*blake3::keyed_hash(&key.into(), self.as_bytes()).as_bytes())
}
/// A marker struct of all 1 bits to use, as all zeros refers to the segment of zero bytes.
pub fn marker() -> Self {
DataHash([!0u64; 4])
}
}
/// The error type that is returned if TryFrom<&[u8]> fails.
#[derive(Debug, Clone)]
pub struct DataHashBytesParseError;
impl Error for DataHashBytesParseError {}
impl fmt::Display for DataHashBytesParseError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Invalid bytes input for DataHash")
}
}
impl TryFrom<&[u8]> for DataHash {
type Error = DataHashBytesParseError;
fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
Self::from_slice(value)
}
}
impl From<DataHash> for Vec<u8> {
fn from(val: DataHash) -> Self {
val.as_bytes().into()
}
}
impl From<&DataHash> for Vec<u8> {
fn from(val: &DataHash) -> Self {
val.as_bytes().into()
}
}
// this is already a nice hash function. We just give the last 64-bits
// for use in hashtables etc.
impl Hash for DataHash {
fn hash<H: Hasher>(&self, state: &mut H) {
state.write_u64(self.0[0]);
}
}
// as generated from random.org
/// The hash key used for [compute_data_hash]
const DATA_KEY: [u8; 32] = [
102, 151, 245, 119, 91, 149, 80, 222, 49, 53, 203, 172, 165, 151, 24, 28, 157, 228, 33, 16, 155, 235, 43, 88, 180,
208, 176, 75, 147, 173, 242, 41,
];
/// The hash key used for [compute_internal_node_hash]
const INTERNAL_NODE_HASH: [u8; 32] = [
1, 126, 197, 199, 165, 71, 41, 150, 253, 148, 102, 102, 180, 138, 2, 230, 93, 221, 83, 111, 55, 199, 109, 210, 248,
99, 82, 230, 74, 83, 113, 63,
];
/// Hash function used to compute a leaf hash of the MerkleTree
/// from any user-provided sequence of bytes. You should be using
/// [compute_internal_node_hash] if this hash is to be used for interior
/// nodes.
///
/// Example:
/// ```ignore
/// let string = "hello world";
/// let hash = compute_data_hash(slice.as_bytes());
/// println!("Hello Hash {}", hash);
/// ```
pub fn compute_data_hash(slice: &[u8]) -> DataHash {
let digest = blake3::keyed_hash(&DATA_KEY, slice);
DataHash::from(digest.as_bytes())
}
/// Hash function used to compute the hash of an interior node.
///
/// Note that this method also accepts a slice
/// of `&[u8]` and it is up to the caller to format the string appropriately.
/// For instance: the string could be simply the child hashes printed out
/// consecutively.
///
/// The reason why this method does not simply take an array of Hashes, and
/// instead require the caller to format the input as a string is to allow the
/// user to add additional information to the string being hashed (beyond just
/// the hashes itself). i.e. the string being hashed could be a concatenation
/// of "hashes of children + children metadata".
///
/// Example:
/// ```ignore
/// let mut buf = String::with_capacity(1024);
/// for node in nodes.iter() {
/// writeln!(buf, "{:x} : {}", node.hash(), node.len()).unwrap();
/// }
/// compute_internal_node_hash(buf.as_bytes())
/// ```
pub fn compute_internal_node_hash(slice: &[u8]) -> DataHash {
let digest = blake3::keyed_hash(&INTERNAL_NODE_HASH, slice);
DataHash::from(digest.as_bytes())
}
impl fmt::LowerHex for DataHash {
/// Allow the DataHash to be printed with
/// `println!("{:x}", hash)`
/// This prints the hexadecimal representation of the Hash.
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
write!(f, "{}", self.hex())
}
}
impl fmt::Display for DataHash {
/// Allow the DataHash to be printed with
/// `println!("{}", hash)`
/// This prints the hexadecimal representation of the Hash.
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
write!(f, "{}", self.hex())
}
}
impl fmt::Debug for DataHash {
/// Allow the DataHash to be printed with
/// `println!("{:?}", hash)`
/// This prints the hexadecimal representation of the Hash.
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
write!(f, "{}", self.hex())
}
}
/// Wrapper around a Write trait that allows computation of the hash at the end.
///
/// It is recommended to wrap this in a BufWriter object: i.e.
///
/// let out_file = std::fs::OpenOptions::new().create(true).write(true).open("temp.fs")?;
///
/// let hashed_write = HashedWrite::new(out_file);
///
/// {
/// let buf_writer = BufWriter::new(&mut hashed_write);
///
/// // Do the writing against buf_writer
///
///
/// }
///
/// let h = hashed_write.hash();
///
pub struct HashedWrite<W: Write> {
hasher: blake3::Hasher,
writer: W,
}
impl<W: Write> HashedWrite<W> {
pub fn new(writer: W) -> Self {
Self {
hasher: blake3::Hasher::new_keyed(&DATA_KEY),
writer,
}
}
pub fn hash(&self) -> DataHash {
let digest = self.hasher.finalize();
DataHash::from(digest.as_bytes())
}
pub fn into_inner(self) -> W {
self.writer
}
}
impl<W: Write> Write for HashedWrite<W> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.hasher.update(buf);
self.writer.write(buf)
}
fn flush(&mut self) -> std::io::Result<()> {
self.writer.flush()
}
}
pub mod hex {
pub mod serde {
use std::fmt;
use serde::de::{self, Visitor};
use serde::{Deserializer, Serializer};
use crate::DataHash;
pub fn serialize<S>(value: &DataHash, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let hex = value.hex();
serializer.serialize_str(&hex)
}
pub fn deserialize<'de, D>(deserializer: D) -> Result<DataHash, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_str(HexVisitor)
}
// Visitor for deserialization
struct HexVisitor;
impl Visitor<'_> for HexVisitor {
type Value = DataHash;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("a merklehash")
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: de::Error,
{
DataHash::from_hex(v).map_err(|e| serde::de::Error::custom(e))
}
}
}
}
#[cfg(test)]
mod tests {
use std::io::Write;
use rand::prelude::*;
use crate::{compute_data_hash, DataHash, HashedWrite};
#[test]
fn test_try_from_bytes() {
let hash_bytes_proper = [1u8; 32].to_vec();
assert!(DataHash::try_from(hash_bytes_proper.as_slice()).is_ok());
let hash_bytes_improper = [1u8; 31];
assert!(DataHash::try_from(hash_bytes_improper.as_slice()).is_err());
}
#[test]
fn test_hashed_write() -> std::io::Result<()> {
let mut written_data = Vec::<u8>::with_capacity(300);
let mut raw_data = vec![0u8; 300];
let mut rng = StdRng::seed_from_u64(0);
rng.fill_bytes(&mut raw_data[..]);
let mut hashed_write = HashedWrite::new(&mut written_data);
for i in 0..30 {
hashed_write.write_all(&raw_data[(10 * i)..(10 * (i + 1))])?;
}
assert_eq!(hashed_write.hash(), compute_data_hash(&raw_data[..]));
assert_eq!(written_data, raw_data);
Ok(())
}
#[test]
fn test_hmac_hash_key_change() {
// Prepare a fixed message
let message_bytes = [0u8; 32];
let message = DataHash::from(message_bytes);
// Prepare two different keys
let key1 = [0u8; 32];
let key2 = [1u8; 32];
// Compute HMAC hashes with different keys
let output1 = message.hmac(key1.into());
let output2 = message.hmac(key2.into());
// Verify that the outputs are different
assert_ne!(output1, output2,);
}
#[test]
fn test_hmac_hash_message_change() {
// Prepare a fixed key
let key = [0u8; 32];
// Prepare two different messages
let message1_bytes = [0u8; 32];
let message2_bytes = [1u8; 32];
let message1 = DataHash::from(message1_bytes);
let message2 = DataHash::from(message2_bytes);
// Compute HMAC hashes with different messages
let output1 = message1.hmac(key.into());
let output2 = message2.hmac(key.into());
// Verify that the outputs are different
assert_ne!(output1, output2,);
}
// Test the base64 usage.
#[test]
fn test_base64() {
let hash = compute_data_hash(&[0, 1, 2, 3]);
let b64 = hash.base64();
assert_eq!(hash, DataHash::from_base64(&b64).unwrap());
}
}