rapidhash/rapid_hasher_inline.rs
1use core::hash::Hasher;
2use crate::rapid_const::{rapidhash_core, rapidhash_finish, rapidhash_seed, RAPID_SEED};
3
4/// A [Hasher] trait compatible hasher that uses the [rapidhash](https://github.com/Nicoshev/rapidhash)
5/// algorithm, and uses `#[inline(always)]` for all methods.
6///
7/// Using `#[inline(always)]` can deliver a large performance improvement when hashing complex
8/// objects, but should be benchmarked for your specific use case. If you have HashMaps for many
9/// different types this may come at the cost of some binary size increase.
10///
11/// See [crate::RapidHasher] for default non-forced inline methods.
12///
13/// See [RapidInlineHashBuilder] for usage with [std::collections::HashMap].
14///
15/// # Example
16/// ```
17/// use std::hash::Hasher;
18/// use rapidhash::RapidInlineHasher;
19///
20/// let mut hasher = RapidInlineHasher::default();
21/// hasher.write(b"hello world");
22/// let hash = hasher.finish();
23/// ```
24#[derive(Copy, Clone, Eq, PartialEq)]
25pub struct RapidInlineHasher {
26 seed: u64,
27 a: u64,
28 b: u64,
29 size: u64,
30}
31
32/// A [std::hash::BuildHasher] trait compatible hasher that uses the [RapidInlineHasher] algorithm.
33///
34/// This is an alias for [`std::hash::BuildHasherDefault<RapidHasher>`] with a static seed.
35///
36/// Note there that [crate::RapidRandomState] with can be used instead for a
37/// [std::hash::BuildHasher] that initialises with a random seed.
38///
39/// # Example
40/// ```
41/// use std::collections::HashMap;
42/// use std::hash::Hasher;
43/// use rapidhash::RapidInlineBuildHasher;
44///
45/// let mut map = HashMap::with_hasher(RapidInlineBuildHasher::default());
46/// map.insert(42, "the answer");
47/// ```
48pub type RapidInlineBuildHasher = core::hash::BuildHasherDefault<RapidInlineHasher>;
49
50/// Deprecated and renamed to [RapidInlineBuildHasher].
51#[deprecated(since = "1.1.0", note = "Renamed to `RapidInlineBuildHasher`")]
52#[doc(hidden)]
53pub type RapidInlineHashBuilder = core::hash::BuildHasherDefault<RapidInlineHasher>;
54
55/// A [std::collections::HashMap] type that uses the [RapidInlineBuildHasher] hasher.
56///
57/// # Example
58/// ```
59/// use rapidhash::RapidInlineHashMap;
60/// let mut map = RapidInlineHashMap::default();
61/// map.insert(42, "the answer");
62///
63/// // with capacity
64/// let mut map = RapidInlineHashMap::with_capacity_and_hasher(10, Default::default());
65/// map.insert(42, "the answer");
66/// ```
67#[cfg(any(feature = "std", docsrs))]
68pub type RapidInlineHashMap<K, V> = std::collections::HashMap<K, V, RapidInlineBuildHasher>;
69
70/// A [std::collections::HashSet] type that uses the [RapidInlineBuildHasher] hasher.
71///
72/// # Example
73/// ```
74/// use rapidhash::RapidInlineHashSet;
75/// let mut set = RapidInlineHashSet::default();
76/// set.insert("the answer");
77///
78/// // with capacity
79/// let mut set = RapidInlineHashSet::with_capacity_and_hasher(10, Default::default());
80/// set.insert("the answer");
81/// ```
82#[cfg(any(feature = "std", docsrs))]
83pub type RapidInlineHashSet<K> = std::collections::HashSet<K, RapidInlineBuildHasher>;
84
85impl RapidInlineHasher {
86 /// Default `RapidHasher` seed.
87 pub const DEFAULT_SEED: u64 = RAPID_SEED;
88
89 /// Create a new [RapidInlineHasher] with a custom seed.
90 #[inline(always)]
91 #[must_use]
92 pub const fn new(seed: u64) -> Self {
93 Self {
94 seed,
95 a: 0,
96 b: 0,
97 size: 0,
98 }
99 }
100
101 /// Create a new [RapidInlineHasher] using the default seed.
102 #[inline(always)]
103 #[must_use]
104 pub const fn default_const() -> Self {
105 Self::new(Self::DEFAULT_SEED)
106 }
107
108 /// Const equivalent to [Hasher::write], and marked as `#[inline(always)]`.
109 ///
110 /// This can deliver a large performance improvement when the `bytes` length is known at compile
111 /// time.
112 #[inline(always)]
113 #[must_use]
114 pub const fn write_const(&self, bytes: &[u8]) -> Self {
115 // FUTURE: wyhash processes the bytes as u64::MAX chunks in case chunk.len() > usize.
116 // we use this static assert to ensure that usize is not larger than u64 for now.
117 const _: () = assert!(
118 usize::MAX as u128 <= u64::MAX as u128,
119 "usize is wider than u64. Please raise a github issue to support this."
120 );
121
122 let mut this = *self;
123 this.size += bytes.len() as u64;
124 this.seed = rapidhash_seed(this.seed, this.size);
125 let (a, b, seed) = rapidhash_core(this.a, this.b, this.seed, bytes);
126 this.a = a;
127 this.b = b;
128 this.seed = seed;
129 this
130 }
131
132 /// Const equivalent to [Hasher::finish], and marked as `#[inline(always)]`.
133 #[inline(always)]
134 #[must_use]
135 pub const fn finish_const(&self) -> u64 {
136 rapidhash_finish(self.a, self.b, self.size)
137 }
138}
139
140impl Default for RapidInlineHasher {
141 /// Create a new [RapidInlineHasher] with the default seed.
142 ///
143 /// See [crate::RapidRandomState] for a [std::hash::BuildHasher] that initialises with a random
144 /// seed.
145 #[inline(always)]
146 fn default() -> Self {
147 Self::new(RAPID_SEED)
148 }
149}
150
151/// This implementation implements methods for all integer types as the compiler will (hopefully...)
152/// inline and heavily optimize the rapidhash_core for each. Where the bytes length is known the
153/// compiler can make significant optimisations and saves us writing them out by hand.
154impl Hasher for RapidInlineHasher {
155 #[inline(always)]
156 fn finish(&self) -> u64 {
157 self.finish_const()
158 }
159
160 /// Write a byte slice to the hasher, marked as `#[inline(always)]`.
161 #[inline(always)]
162 fn write(&mut self, bytes: &[u8]) {
163 *self = self.write_const(bytes);
164 }
165
166 #[inline(always)]
167 fn write_u8(&mut self, i: u8) {
168 *self = self.write_const(&i.to_ne_bytes());
169 }
170
171 #[inline(always)]
172 fn write_u16(&mut self, i: u16) {
173 *self = self.write_const(&i.to_ne_bytes());
174 }
175
176 #[inline(always)]
177 fn write_u32(&mut self, i: u32) {
178 *self = self.write_const(&i.to_ne_bytes());
179 }
180
181 #[inline(always)]
182 fn write_u64(&mut self, i: u64) {
183 *self = self.write_const(&i.to_ne_bytes());
184
185 // NOTE: in case of compiler regression, it should compile to:
186 // self.size += size_of::<u64>() as u64;
187 // self.seed ^= rapid_mix(self.seed ^ RAPID_SECRET[0], RAPID_SECRET[1]) ^ self.size;
188 // self.a ^= i.rotate_right(32) ^ RAPID_SECRET[1];
189 // self.b ^= i ^ self.seed;
190 // rapid_mum(&mut self.a, &mut self.b);
191 }
192
193 #[inline(always)]
194 fn write_u128(&mut self, i: u128) {
195 *self = self.write_const(&i.to_ne_bytes());
196 }
197
198 #[inline(always)]
199 fn write_usize(&mut self, i: usize) {
200 *self = self.write_const(&i.to_ne_bytes());
201 }
202
203 #[inline(always)]
204 fn write_i8(&mut self, i: i8) {
205 *self = self.write_const(&i.to_ne_bytes());
206 }
207
208 #[inline(always)]
209 fn write_i16(&mut self, i: i16) {
210 *self = self.write_const(&i.to_ne_bytes());
211 }
212
213 #[inline(always)]
214 fn write_i32(&mut self, i: i32) {
215 *self = self.write_const(&i.to_ne_bytes());
216 }
217
218 #[inline(always)]
219 fn write_i64(&mut self, i: i64) {
220 *self = self.write_const(&i.to_ne_bytes());
221 }
222
223 #[inline(always)]
224 fn write_i128(&mut self, i: i128) {
225 *self = self.write_const(&i.to_ne_bytes());
226 }
227
228 #[inline(always)]
229 fn write_isize(&mut self, i: isize) {
230 *self = self.write_const(&i.to_ne_bytes());
231 }
232}
233
234#[cfg(test)]
235mod tests {
236 use super::*;
237
238 #[test]
239 fn test_hasher_write_u64() {
240 assert_eq!((8 & 24) >> (8 >> 3), 4);
241
242 let ints = [
243 1234u64,
244 0,
245 1,
246 u64::MAX,
247 u64::MAX - 2385962040453523
248 ];
249
250 for int in ints {
251 let mut hasher = RapidInlineHasher::default();
252 hasher.write(int.to_ne_bytes().as_slice());
253 let a = hasher.finish();
254
255 assert_eq!(int.to_ne_bytes().as_slice().len(), 8);
256
257 let mut hasher = RapidInlineHasher::default();
258 hasher.write_u64(int);
259 let b = hasher.finish();
260
261 assert_eq!(a, b, "Mismatching hash for u64 with input {int}");
262 }
263 }
264}