rapidhash/
rapid_hasher_inline.rs

1use core::hash::Hasher;
2use crate::rapid_const::{rapidhash_core, rapidhash_finish, rapidhash_seed, RAPID_SEED};
3
4/// A [Hasher] trait compatible hasher that uses the [rapidhash](https://github.com/Nicoshev/rapidhash)
5/// algorithm, and uses `#[inline(always)]` for all methods.
6///
7/// Using `#[inline(always)]` can deliver a large performance improvement when hashing complex
8/// objects, but should be benchmarked for your specific use case. If you have HashMaps for many
9/// different types this may come at the cost of some binary size increase.
10///
11/// See [crate::RapidHasher] for default non-forced inline methods.
12///
13/// See [RapidInlineHashBuilder] for usage with [std::collections::HashMap].
14///
15/// # Example
16/// ```
17/// use std::hash::Hasher;
18/// use rapidhash::RapidInlineHasher;
19///
20/// let mut hasher = RapidInlineHasher::default();
21/// hasher.write(b"hello world");
22/// let hash = hasher.finish();
23/// ```
24#[derive(Copy, Clone, Eq, PartialEq)]
25pub struct RapidInlineHasher {
26    seed: u64,
27    a: u64,
28    b: u64,
29    size: u64,
30}
31
32/// A [std::hash::BuildHasher] trait compatible hasher that uses the [RapidInlineHasher] algorithm.
33///
34/// This is an alias for [`std::hash::BuildHasherDefault<RapidHasher>`] with a static seed.
35///
36/// Note there that [crate::RapidRandomState] with can be used instead for a
37/// [std::hash::BuildHasher] that initialises with a random seed.
38///
39/// # Example
40/// ```
41/// use std::collections::HashMap;
42/// use std::hash::Hasher;
43/// use rapidhash::RapidInlineBuildHasher;
44///
45/// let mut map = HashMap::with_hasher(RapidInlineBuildHasher::default());
46/// map.insert(42, "the answer");
47/// ```
48pub type RapidInlineBuildHasher = core::hash::BuildHasherDefault<RapidInlineHasher>;
49
50/// Deprecated and renamed to [RapidInlineBuildHasher].
51#[deprecated(since = "1.1.0", note = "Renamed to `RapidInlineBuildHasher`")]
52#[doc(hidden)]
53pub type RapidInlineHashBuilder = core::hash::BuildHasherDefault<RapidInlineHasher>;
54
55/// A [std::collections::HashMap] type that uses the [RapidInlineBuildHasher] hasher.
56///
57/// # Example
58/// ```
59/// use rapidhash::RapidInlineHashMap;
60/// let mut map = RapidInlineHashMap::default();
61/// map.insert(42, "the answer");
62///
63/// // with capacity
64/// let mut map = RapidInlineHashMap::with_capacity_and_hasher(10, Default::default());
65/// map.insert(42, "the answer");
66/// ```
67#[cfg(any(feature = "std", docsrs))]
68pub type RapidInlineHashMap<K, V> = std::collections::HashMap<K, V, RapidInlineBuildHasher>;
69
70/// A [std::collections::HashSet] type that uses the [RapidInlineBuildHasher] hasher.
71///
72/// # Example
73/// ```
74/// use rapidhash::RapidInlineHashSet;
75/// let mut set = RapidInlineHashSet::default();
76/// set.insert("the answer");
77///
78/// // with capacity
79/// let mut set = RapidInlineHashSet::with_capacity_and_hasher(10, Default::default());
80/// set.insert("the answer");
81/// ```
82#[cfg(any(feature = "std", docsrs))]
83pub type RapidInlineHashSet<K> = std::collections::HashSet<K, RapidInlineBuildHasher>;
84
85impl RapidInlineHasher {
86    /// Default `RapidHasher` seed.
87    pub const DEFAULT_SEED: u64 = RAPID_SEED;
88
89    /// Create a new [RapidInlineHasher] with a custom seed.
90    #[inline(always)]
91    #[must_use]
92    pub const fn new(seed: u64) -> Self {
93        Self {
94            seed,
95            a: 0,
96            b: 0,
97            size: 0,
98        }
99    }
100
101    /// Create a new [RapidInlineHasher] using the default seed.
102    #[inline(always)]
103    #[must_use]
104    pub const fn default_const() -> Self {
105        Self::new(Self::DEFAULT_SEED)
106    }
107
108    /// Const equivalent to [Hasher::write], and marked as `#[inline(always)]`.
109    ///
110    /// This can deliver a large performance improvement when the `bytes` length is known at compile
111    /// time.
112    #[inline(always)]
113    #[must_use]
114    pub const fn write_const(&self, bytes: &[u8]) -> Self {
115        // FUTURE: wyhash processes the bytes as u64::MAX chunks in case chunk.len() > usize.
116        // we use this static assert to ensure that usize is not larger than u64 for now.
117        const _: () = assert!(
118            usize::MAX as u128 <= u64::MAX as u128,
119            "usize is wider than u64. Please raise a github issue to support this."
120        );
121
122        let mut this = *self;
123        this.size += bytes.len() as u64;
124        this.seed = rapidhash_seed(this.seed, this.size);
125        let (a, b, seed) = rapidhash_core(this.a, this.b, this.seed, bytes);
126        this.a = a;
127        this.b = b;
128        this.seed = seed;
129        this
130    }
131
132    /// Const equivalent to [Hasher::finish], and marked as `#[inline(always)]`.
133    #[inline(always)]
134    #[must_use]
135    pub const fn finish_const(&self) -> u64 {
136        rapidhash_finish(self.a, self.b, self.size)
137    }
138}
139
140impl Default for RapidInlineHasher {
141    /// Create a new [RapidInlineHasher] with the default seed.
142    ///
143    /// See [crate::RapidRandomState] for a [std::hash::BuildHasher] that initialises with a random
144    /// seed.
145    #[inline(always)]
146    fn default() -> Self {
147        Self::new(RAPID_SEED)
148    }
149}
150
151/// This implementation implements methods for all integer types as the compiler will (hopefully...)
152/// inline and heavily optimize the rapidhash_core for each. Where the bytes length is known the
153/// compiler can make significant optimisations and saves us writing them out by hand.
154impl Hasher for RapidInlineHasher {
155    #[inline(always)]
156    fn finish(&self) -> u64 {
157        self.finish_const()
158    }
159
160    /// Write a byte slice to the hasher, marked as `#[inline(always)]`.
161    #[inline(always)]
162    fn write(&mut self, bytes: &[u8]) {
163        *self = self.write_const(bytes);
164    }
165
166    #[inline(always)]
167    fn write_u8(&mut self, i: u8) {
168        *self = self.write_const(&i.to_ne_bytes());
169    }
170
171    #[inline(always)]
172    fn write_u16(&mut self, i: u16) {
173        *self = self.write_const(&i.to_ne_bytes());
174    }
175
176    #[inline(always)]
177    fn write_u32(&mut self, i: u32) {
178        *self = self.write_const(&i.to_ne_bytes());
179    }
180
181    #[inline(always)]
182    fn write_u64(&mut self, i: u64) {
183        *self = self.write_const(&i.to_ne_bytes());
184
185        // NOTE: in case of compiler regression, it should compile to:
186        // self.size += size_of::<u64>() as u64;
187        // self.seed ^= rapid_mix(self.seed ^ RAPID_SECRET[0], RAPID_SECRET[1]) ^ self.size;
188        // self.a ^= i.rotate_right(32) ^ RAPID_SECRET[1];
189        // self.b ^= i ^ self.seed;
190        // rapid_mum(&mut self.a, &mut self.b);
191    }
192
193    #[inline(always)]
194    fn write_u128(&mut self, i: u128) {
195        *self = self.write_const(&i.to_ne_bytes());
196    }
197
198    #[inline(always)]
199    fn write_usize(&mut self, i: usize) {
200        *self = self.write_const(&i.to_ne_bytes());
201    }
202
203    #[inline(always)]
204    fn write_i8(&mut self, i: i8) {
205        *self = self.write_const(&i.to_ne_bytes());
206    }
207
208    #[inline(always)]
209    fn write_i16(&mut self, i: i16) {
210        *self = self.write_const(&i.to_ne_bytes());
211    }
212
213    #[inline(always)]
214    fn write_i32(&mut self, i: i32) {
215        *self = self.write_const(&i.to_ne_bytes());
216    }
217
218    #[inline(always)]
219    fn write_i64(&mut self, i: i64) {
220        *self = self.write_const(&i.to_ne_bytes());
221    }
222
223    #[inline(always)]
224    fn write_i128(&mut self, i: i128) {
225        *self = self.write_const(&i.to_ne_bytes());
226    }
227
228    #[inline(always)]
229    fn write_isize(&mut self, i: isize) {
230        *self = self.write_const(&i.to_ne_bytes());
231    }
232}
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237
238    #[test]
239    fn test_hasher_write_u64() {
240        assert_eq!((8 & 24) >> (8 >> 3), 4);
241
242        let ints = [
243            1234u64,
244            0,
245            1,
246            u64::MAX,
247            u64::MAX - 2385962040453523
248        ];
249
250        for int in ints {
251            let mut hasher = RapidInlineHasher::default();
252            hasher.write(int.to_ne_bytes().as_slice());
253            let a = hasher.finish();
254
255            assert_eq!(int.to_ne_bytes().as_slice().len(), 8);
256
257            let mut hasher = RapidInlineHasher::default();
258            hasher.write_u64(int);
259            let b = hasher.finish();
260
261            assert_eq!(a, b, "Mismatching hash for u64 with input {int}");
262        }
263    }
264}