deflate/
chained_hash_table.rs1use std::{mem, ptr};
3
4pub const WINDOW_SIZE: usize = 32768;
5pub const WINDOW_MASK: usize = WINDOW_SIZE - 1;
6#[cfg(test)]
7pub const HASH_BYTES: usize = 3;
8const HASH_SHIFT: u16 = 5;
9const HASH_MASK: u16 = WINDOW_MASK as u16;
10
11struct Tables {
13 pub head: [u16; WINDOW_SIZE],
15 pub prev: [u16; WINDOW_SIZE],
17}
18
19impl Default for Tables {
20 #[inline]
21 fn default() -> Tables {
22 unsafe {
27 Tables {
28 head: mem::uninitialized(),
29 prev: mem::uninitialized(),
30 }
31 }
32 }
33}
34
35impl Tables {
36 fn fill_prev(&mut self) {
37 assert_eq!(self.head.len(), self.prev.len());
38 unsafe {
43 ptr::copy_nonoverlapping(self.head.as_ptr(), self.prev.as_mut_ptr(), self.prev.len())
44 }
45 }
46}
47
48fn create_tables() -> Box<Tables> {
50 let mut t: Box<Tables> = Box::default();
58
59 for (n, b) in t.head.iter_mut().enumerate() {
60 unsafe {
65 ptr::write(b, n as u16);
66 }
67 }
68
69 t.fill_prev();
70
71 t
72}
73
74#[inline]
76pub fn update_hash(current_hash: u16, to_insert: u8) -> u16 {
77 update_hash_conf(current_hash, to_insert, HASH_SHIFT, HASH_MASK)
78}
79
80#[inline]
81fn update_hash_conf(current_hash: u16, to_insert: u8, shift: u16, mask: u16) -> u16 {
82 ((current_hash << shift) ^ (to_insert as u16)) & mask
83}
84
85#[inline]
86fn reset_array(arr: &mut [u16; WINDOW_SIZE]) {
87 for (n, b) in arr.iter_mut().enumerate() {
88 *b = n as u16;
89 }
90}
91
92pub struct ChainedHashTable {
93 current_hash: u16,
95 c: Box<Tables>,
97 }
100
101impl ChainedHashTable {
102 pub fn new() -> ChainedHashTable {
103 ChainedHashTable {
104 current_hash: 0,
105 c: create_tables(),
106 }
108 }
109
110 #[cfg(test)]
111 pub fn from_starting_values(v1: u8, v2: u8) -> ChainedHashTable {
112 let mut t = ChainedHashTable::new();
113 t.current_hash = update_hash(t.current_hash, v1);
114 t.current_hash = update_hash(t.current_hash, v2);
115 t
116 }
117
118 pub fn reset(&mut self) {
120 self.current_hash = 0;
121 reset_array(&mut self.c.head);
122 {
123 let h = self.c.head;
124 let mut c = self.c.prev;
125 c[..].copy_from_slice(&h[..]);
126 }
127 }
131
132 pub fn add_initial_hash_values(&mut self, v1: u8, v2: u8) {
133 self.current_hash = update_hash(self.current_hash, v1);
134 self.current_hash = update_hash(self.current_hash, v2);
135 }
136
137 #[inline]
139 pub fn add_hash_value(&mut self, position: usize, value: u8) {
140 debug_assert!(
147 position < WINDOW_SIZE * 2,
148 "Position is larger than 2 * window size! {}",
149 position
150 );
151 let new_hash = update_hash(self.current_hash, value);
154
155 self.add_with_hash(position, new_hash);
156
157 self.current_hash = new_hash;
159 }
160
161 #[inline]
163 pub fn set_hash(&mut self, hash: u16) {
164 self.current_hash = hash;
165 }
166
167 #[inline]
169 pub fn add_with_hash(&mut self, position: usize, hash: u16) {
170 self.c.prev[position & WINDOW_MASK] = self.c.head[hash as usize];
175
176 self.c.head[hash as usize] = position as u16;
179 }
180
181 #[cfg(test)]
183 #[inline]
184 pub fn current_head(&self) -> u16 {
185 self.c.head[self.current_hash as usize]
186 }
187
188 #[inline]
189 pub fn current_hash(&self) -> u16 {
190 self.current_hash
191 }
192
193 #[inline]
194 pub fn get_prev(&self, bytes: usize) -> u16 {
195 self.c.prev[bytes & WINDOW_MASK]
196 }
197
198 #[cfg(test)]
199 #[inline]
200 pub fn farthest_next(&self, match_pos: usize, match_len: usize) -> usize {
201 let to_check = match_len.saturating_sub(2);
202
203 let mut n = 0;
204 let mut smallest_prev =
205 self.get_prev(match_pos);
206 let mut smallest_pos = 0;
207 while n < to_check {
208 let prev =
209 self.get_prev(match_pos + n);
210 if prev < smallest_prev {
211 smallest_prev = prev;
212 smallest_pos = n;
213 }
214 n += 1;
215 }
216 smallest_pos
217 }
218
219 #[inline]
220 fn slide_value(b: u16, pos: u16, bytes: u16) -> u16 {
221 if b >= bytes {
222 b - bytes
223 } else {
224 pos
225 }
226 }
227
228 #[inline]
229 fn slide_table(table: &mut [u16; WINDOW_SIZE], bytes: u16) {
230 for (n, b) in table.iter_mut().enumerate() {
231 *b = ChainedHashTable::slide_value(*b, n as u16, bytes);
232 }
233 }
234
235 pub fn slide(&mut self, bytes: usize) {
236 ChainedHashTable::slide_table(&mut self.c.head, bytes as u16);
241 ChainedHashTable::slide_table(&mut self.c.prev, bytes as u16);
242 }
243}
244
245#[cfg(test)]
246pub fn filled_hash_table(data: &[u8]) -> ChainedHashTable {
247 assert!(data.len() <= (WINDOW_SIZE * 2) + 2);
248 let mut hash_table = ChainedHashTable::from_starting_values(data[0], data[1]);
249 for (n, b) in data[2..].iter().enumerate() {
250 hash_table.add_hash_value(n, *b);
251 }
252 hash_table
253}
254
255#[cfg(test)]
256mod test {
257 use super::{filled_hash_table, ChainedHashTable};
258
259 #[test]
260 fn chained_hash() {
261 use std::str;
262
263 let test_string = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do \
264 eiusmod tempor. rum. incididunt ut labore et dolore magna aliqua. Ut \
265 enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi \
266 ut aliquip ex ea commodo consequat. rum. Duis aute irure dolor in \
267 reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla \
268 pariatur. Excepteur sint occaecat cupidatat non proident, sunt in \
269 culpa qui officia deserunt mollit anim id est laborum.";
270
271 let test_data = test_string.as_bytes();
272
273 let current_bytes = &test_data[test_data.len() - super::HASH_BYTES..test_data.len()];
274
275 let num_iters = test_string
276 .matches(str::from_utf8(current_bytes).unwrap())
277 .count();
278
279 let hash_table = filled_hash_table(test_data);
280
281 let mut prev_value = hash_table.get_prev(hash_table.current_head() as usize) as usize;
283 let mut count = 0;
284 let mut current = hash_table.current_head() as usize;
285 while current != prev_value {
286 count += 1;
287 current = prev_value;
288 prev_value = hash_table.get_prev(prev_value) as usize;
289 }
290 assert!(count >= num_iters);
294 }
295
296 #[test]
297 fn table_unique() {
298 let mut test_data = Vec::new();
299 test_data.extend(0u8..255);
300 test_data.extend(255u8..0);
301 let hash_table = filled_hash_table(&test_data);
302 let prev_pos = hash_table.get_prev(hash_table.current_head() as usize);
303 assert_eq!(prev_pos, hash_table.current_hash());
305 }
306
307 #[test]
308 fn table_slide() {
309 use std::fs::File;
310 use std::io::Read;
311
312 let window_size = super::WINDOW_SIZE;
313 let window_size16 = super::WINDOW_SIZE as u16;
314
315 let mut input = Vec::new();
316
317 let mut f = File::open("tests/pg11.txt").unwrap();
318
319 f.read_to_end(&mut input).unwrap();
320
321 let mut hash_table = filled_hash_table(&input[..window_size + 2]);
322
323 for (n, b) in input[2..window_size + 2].iter().enumerate() {
324 hash_table.add_hash_value(n + window_size, *b);
325 }
326
327 hash_table.slide(window_size);
328
329 {
330 let max_head = hash_table.c.head.iter().max().unwrap();
331 assert!(*max_head < window_size16);
334 assert!(*max_head > 0);
335 let pos = hash_table.get_prev(hash_table.current_head() as usize);
336 assert!(pos < window_size16);
338 assert!(pos > 0);
339 }
340
341 for (n, b) in input[2..(window_size / 2)].iter().enumerate() {
342 hash_table.add_hash_value(n + window_size, *b);
343 }
344
345 let max_prev = hash_table.c.prev.iter().max().unwrap();
348 assert!(*max_prev > window_size16);
349
350 let mut pos = hash_table.current_head();
351 assert!(pos > window_size16);
353 let end_byte = input[(window_size / 2) - 1 - 2];
354 let mut iterations = 0;
355 while pos > window_size16 && iterations < 5000 {
356 assert_eq!(input[pos as usize & window_size - 1], end_byte);
357
358 pos = hash_table.get_prev(pos as usize);
359 iterations += 1;
360 }
361 }
362
363 #[test]
364 fn initial_chains() {
366 let t = ChainedHashTable::new();
367 for (n, &b) in t.c.head.iter().enumerate() {
368 assert_eq!(n, b as usize);
369 }
370 for (n, &b) in t.c.prev.iter().enumerate() {
371 assert_eq!(n, b as usize);
372 }
373 }
374}