sparse/
reader.rs

1// Copyright 2023 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::{Chunk, SparseHeader, deserialize_from};
6use anyhow::{Context, Result, ensure};
7use byteorder::{ByteOrder as _, LE};
8use std::io::{Read, Seek, SeekFrom};
9
10/// SparseReader is an implementation of std::io::Read which transparently unpacks the underlying
11/// sparse image as it is read.
12/// If random access reads are not required, it is more performant to use `unsparse` to completely
13/// unpack a sparse image.
14pub struct SparseReader<R> {
15    reader: R,
16    // Offset into the logical (unsparsed) image.
17    offset: u64,
18    // Size of the logical (unsparsed) image.
19    size: u64,
20    // The second field is the offset into `reader` at which the payload of the chunk appears, for
21    // Raw chunks.
22    chunks: Vec<(Chunk, Option<u64>)>,
23    // The block size of each chunk.
24    block_size: u32,
25}
26
27impl<R: Read + Seek> SparseReader<R> {
28    /// Attempts to create a SparseReader from the given image.  Returns failure if the image is
29    /// malformed.
30    pub fn new(mut reader: R) -> Result<Self> {
31        let header: SparseHeader =
32            deserialize_from(&mut reader).context("Failed to read header")?;
33        ensure!(header.valid(), "Invalid header");
34        let num_chunks = header.total_chunks as usize;
35
36        let mut chunks = vec![];
37        let mut offset = 0;
38        for _ in 0..num_chunks {
39            let chunk = Chunk::read_metadata(&mut reader, offset, header.blk_sz)?;
40            let data_offset = if chunk.chunk_type() == crate::format::CHUNK_TYPE_RAW {
41                let data_offset = reader.stream_position()?;
42                // Skip past the data payload
43                reader.seek(SeekFrom::Current(chunk.output_size() as i64))?;
44                Some(data_offset)
45            } else {
46                None
47            };
48            offset += chunk.output_size() as u64;
49            chunks.push((chunk, data_offset));
50        }
51
52        reader.seek(SeekFrom::Start(0)).context("Failed to rewind reader")?;
53        Ok(Self { reader, offset: 0, size: offset, chunks, block_size: header.blk_sz })
54    }
55
56    /// Returns the index of the current chunk in `self.chunks`.
57    fn current_chunk(&self) -> Option<usize> {
58        let mut off = 0;
59        let mut i = 0;
60        for (chunk, _) in &self.chunks {
61            let size = chunk.output_size() as u64;
62            if self.offset >= off && self.offset < off + size {
63                return Some(i);
64            }
65            off += size;
66            i += 1;
67        }
68        None
69    }
70
71    pub fn is_sparse_file(reader: &mut R) -> Result<bool> {
72        let header: SparseHeader = deserialize_from(reader)?;
73        let res = header.valid();
74        reader.seek(SeekFrom::Start(0)).context("Failed to rewind reader")?;
75        Ok(res)
76    }
77
78    pub fn chunks(&self) -> &Vec<(Chunk, Option<u64>)> {
79        &self.chunks
80    }
81
82    pub fn unsparsed_size(&self) -> u64 {
83        self.size
84    }
85
86    pub fn block_size(&self) -> u32 {
87        self.block_size
88    }
89}
90
91// It's assumed that `reader` already points at the right offset to read from the chunk, and `buf`
92// won't read past the end of the chunk.
93// `output_offset` is the logical position in the output stream.
94fn read_from_chunk<R: Read + Seek>(
95    reader: &mut R,
96    chunk: &Chunk,
97    output_offset: u64,
98    buf: &mut [u8],
99) -> std::io::Result<usize> {
100    match chunk {
101        Chunk::Raw { .. } => reader.read(buf),
102        Chunk::Fill { value, .. } => {
103            let mut value_bytes = value.to_le_bytes();
104            value_bytes.rotate_left(output_offset as usize % std::mem::size_of::<u32>());
105            let value_rotated = LE::read_u32(&value_bytes);
106            // Safety: `std::slice::align_to_mut` requires that everything in the dst slice is a
107            // valid type, which is true when going from [u8; 4] to [u32; 1].
108            let (prefix, wholes, suffix) = unsafe { buf.align_to_mut::<u32>() };
109            prefix.copy_from_slice(&value_bytes[value_bytes.len() - prefix.len()..]);
110            wholes.fill(value_rotated);
111            suffix.copy_from_slice(&value_bytes[..suffix.len()]);
112            Ok(buf.len())
113        }
114        Chunk::DontCare { .. } => {
115            buf.fill(0);
116            Ok(buf.len())
117        }
118        _ => unreachable!(),
119    }
120}
121
122impl<R: Read + Seek> Read for SparseReader<R> {
123    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
124        let mut bytes_read = 0;
125        while bytes_read < buf.len() {
126            let current_chunk_idx = match self.current_chunk() {
127                Some(i) => i,
128                None => return Ok(bytes_read),
129            };
130            let (current_chunk, chunk_start_offset) = &self.chunks[current_chunk_idx];
131            let offset_in_chunk = self.offset - current_chunk.output_offset().unwrap();
132            debug_assert!(offset_in_chunk < current_chunk.output_size() as u64);
133            let to_read = std::cmp::min(
134                buf.len() - bytes_read,
135                current_chunk.output_size() as usize - offset_in_chunk as usize,
136            );
137            if let Some(offset) = chunk_start_offset {
138                self.reader.seek(SeekFrom::Start(*offset + offset_in_chunk))?;
139            }
140            let bytes_read_from_chunk = read_from_chunk(
141                &mut self.reader,
142                current_chunk,
143                self.offset,
144                &mut buf[bytes_read..bytes_read + to_read],
145            )?;
146            bytes_read += bytes_read_from_chunk;
147            self.offset += bytes_read_from_chunk as u64;
148        }
149        Ok(bytes_read)
150    }
151}
152
153impl<R: Read + Seek> Seek for SparseReader<R> {
154    fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
155        self.offset = match pos {
156            SeekFrom::Start(pos) => pos,
157            SeekFrom::Current(delta) => self
158                .offset
159                .checked_add_signed(delta)
160                .ok_or_else(|| std::io::Error::from(std::io::ErrorKind::InvalidInput))?,
161            SeekFrom::End(delta) => self
162                .size
163                .checked_add_signed(delta)
164                .ok_or_else(|| std::io::Error::from(std::io::ErrorKind::InvalidInput))?,
165        };
166        Ok(self.offset)
167    }
168}
169
170#[cfg(test)]
171mod test {
172    use crate::builder::{DataSource, SparseImageBuilder};
173    use crate::reader::SparseReader;
174    use rand::rngs::SmallRng;
175    use rand::{RngCore, SeedableRng};
176    use std::io::{Read as _, Seek as _, SeekFrom, Write as _};
177    use tempfile::{NamedTempFile, TempDir};
178
179    #[test]
180    fn empty_reader() {
181        let tmpdir = TempDir::new().unwrap();
182
183        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
184        SparseImageBuilder::new().build(&mut sparse_file).expect("Build sparse image failed");
185        sparse_file.seek(SeekFrom::Start(0)).unwrap();
186
187        let mut reader =
188            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
189
190        let mut unsparsed_bytes = vec![];
191        reader.read_to_end(&mut unsparsed_bytes).expect("Failed to read unsparsed image");
192        assert_eq!(unsparsed_bytes.len(), 0);
193    }
194
195    #[test]
196    fn is_sparse_file() {
197        let tmpdir = TempDir::new().unwrap();
198
199        let data = {
200            let mut data = Box::new([0u8; 8192]);
201            let mut i: u8 = 0;
202            for d in data.as_mut() {
203                *d = i;
204                i = i.wrapping_add(1);
205            }
206            data
207        };
208
209        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
210        SparseImageBuilder::new()
211            .add_source(DataSource::Buffer(data))
212            .build(&mut sparse_file)
213            .expect("Build sparse image failed");
214        sparse_file.seek(SeekFrom::Start(0)).unwrap();
215
216        assert!(SparseReader::is_sparse_file(&mut sparse_file).expect("Should be a sparse file"));
217
218        let mut garbage_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
219        let garbage_data = vec![1; 4096];
220        garbage_file.write_all(&garbage_data).expect("Writing garbage file");
221        garbage_file.seek(SeekFrom::Start(0)).unwrap();
222
223        assert!(!SparseReader::is_sparse_file(&mut garbage_file).unwrap());
224    }
225
226    #[test]
227    fn seek() {
228        let tmpdir = TempDir::new().unwrap();
229
230        let data = {
231            let mut data = Box::new([0u8; 8192]);
232            let mut i: u8 = 0;
233            for d in data.as_mut() {
234                *d = i;
235                i = i.wrapping_add(1);
236            }
237            data
238        };
239
240        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
241        SparseImageBuilder::new()
242            .add_source(DataSource::Buffer(data))
243            .build(&mut sparse_file)
244            .expect("Build sparse image failed");
245        sparse_file.seek(SeekFrom::Start(0)).unwrap();
246        let mut reader =
247            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
248
249        let mut buf = [0u8; 1];
250        assert_eq!(0, reader.seek(SeekFrom::Start(0)).unwrap());
251        assert_eq!(1, reader.read(&mut buf).unwrap());
252        assert_eq!(buf[0], 0u8);
253
254        assert_eq!(100, reader.seek(SeekFrom::Start(100)).unwrap());
255        assert_eq!(1, reader.read(&mut buf).unwrap());
256        assert_eq!(buf[0], 100u8);
257
258        assert_eq!(99, reader.seek(SeekFrom::Current(-2)).unwrap());
259        assert_eq!(1, reader.read(&mut buf).unwrap());
260        assert_eq!(buf[0], 99u8);
261
262        assert_eq!(100, reader.seek(SeekFrom::Current(0)).unwrap());
263        assert_eq!(1, reader.read(&mut buf).unwrap());
264        assert_eq!(buf[0], 100u8);
265
266        assert_eq!(102, reader.seek(SeekFrom::Current(1)).unwrap());
267        assert_eq!(1, reader.read(&mut buf).unwrap());
268        assert_eq!(buf[0], 102u8);
269
270        assert_eq!(8191, reader.seek(SeekFrom::End(-1)).unwrap());
271        assert_eq!(1, reader.read(&mut buf).unwrap());
272        assert_eq!(buf[0], 255u8);
273
274        assert_eq!(8192, reader.seek(SeekFrom::End(0)).unwrap());
275        assert_eq!(0, reader.read(&mut buf).unwrap());
276
277        assert_eq!(8193, reader.seek(SeekFrom::End(1)).unwrap());
278        assert_eq!(0, reader.read(&mut buf).unwrap());
279    }
280
281    #[test]
282    fn read_past_eof() {
283        let tmpdir = TempDir::new().unwrap();
284
285        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
286        SparseImageBuilder::new()
287            .add_source(DataSource::Buffer(Box::new([0xffu8; 8192])))
288            .build(&mut sparse_file)
289            .expect("Build sparse image failed");
290        sparse_file.seek(SeekFrom::Start(0)).unwrap();
291
292        let mut reader =
293            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
294
295        let mut buf = [0u8; 2];
296
297        reader.seek(SeekFrom::Start(8191)).expect("Seek failed");
298        assert_eq!(reader.read(&mut buf).expect("Failed to read"), 1);
299
300        reader.seek(SeekFrom::Start(8192)).expect("Seek failed");
301        assert_eq!(reader.read(&mut buf).expect("Failed to read"), 0);
302    }
303
304    #[test]
305    fn full_read() {
306        let tmpdir = TempDir::new().unwrap();
307
308        // Generate a large temporary file
309        let (mut file, _temp_path) = NamedTempFile::new_in(&tmpdir).unwrap().into_parts();
310        let mut rng = SmallRng::from_os_rng();
311        let mut data = Vec::<u8>::new();
312        data.resize(100 * 4096, 0);
313        rng.fill_bytes(&mut data);
314        file.write_all(&data).unwrap();
315        file.flush().unwrap();
316        file.seek(SeekFrom::Start(0)).unwrap();
317        let content_size = data.len();
318
319        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
320        SparseImageBuilder::new()
321            .add_source(DataSource::Buffer(Box::new([0xffu8; 8192])))
322            .add_source(DataSource::Reader { reader: Box::new(file), size: content_size as u64 })
323            .add_source(DataSource::Skip(16384))
324            .add_source(DataSource::Fill(0xaaaa_aaaau32, 1024))
325            .add_source(DataSource::Skip(4096))
326            .build(&mut sparse_file)
327            .expect("Build sparse image failed");
328        sparse_file.seek(SeekFrom::Start(0)).unwrap();
329
330        let mut reader =
331            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
332
333        let mut unsparsed_bytes = vec![];
334        reader.read_to_end(&mut unsparsed_bytes).expect("Failed to read unsparsed image");
335        assert_eq!(unsparsed_bytes.len(), 8192 + content_size + 16384 + 4096 + 4096);
336        assert_eq!(&unsparsed_bytes[..8192], &[0xffu8; 8192]);
337        assert_eq!(&unsparsed_bytes[8192..8192 + content_size], &data[..]);
338        assert_eq!(
339            &unsparsed_bytes[8192 + content_size..8192 + content_size + 16384],
340            &[0u8; 16384]
341        );
342        assert_eq!(
343            &unsparsed_bytes[8192 + content_size + 16384..8192 + content_size + 16384 + 4096],
344            &[0xaau8; 4096]
345        );
346        assert_eq!(&unsparsed_bytes[8192 + content_size + 16384 + 4096..], &[0u8; 4096]);
347    }
348
349    #[test]
350    fn unaligned_reads() {
351        let tmpdir = TempDir::new().unwrap();
352
353        // Generate a large temporary file
354        let (mut file, _temp_path) = NamedTempFile::new_in(&tmpdir).unwrap().into_parts();
355        let mut rng = SmallRng::from_os_rng();
356        let mut data = Vec::<u8>::new();
357        data.resize(100 * 4096, 0);
358        rng.fill_bytes(&mut data);
359        file.write_all(&data).unwrap();
360        file.flush().unwrap();
361        file.seek(SeekFrom::Start(0)).unwrap();
362        let content_size = data.len();
363
364        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
365        SparseImageBuilder::new()
366            .add_source(DataSource::Buffer(Box::new([0xffu8; 8192])))
367            .add_source(DataSource::Reader { reader: Box::new(file), size: content_size as u64 })
368            .add_source(DataSource::Skip(16384))
369            .add_source(DataSource::Fill(0x0102_0304u32, 1024))
370            .add_source(DataSource::Skip(4096))
371            .build(&mut sparse_file)
372            .expect("Build sparse image failed");
373        sparse_file.seek(SeekFrom::Start(0)).unwrap();
374
375        let mut reader =
376            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
377
378        let mut buffer = [0u8; 4096];
379
380        // Do an unaligned read from each section
381
382        // DataSource::Buffer
383        reader.seek(SeekFrom::Start(10)).expect("Failed to seek");
384        let _ = reader.read(&mut buffer[..20]).expect("Failed to read");
385        assert_eq!(&buffer[..20], &[0xffu8; 20]);
386
387        // DataSource::File
388        reader.seek(SeekFrom::Start(8192 + 4095)).expect("Failed to seek");
389        let _ = reader.read(&mut buffer[..2]).expect("Failed to read");
390        assert_eq!(&buffer[..2], &data[4095..4097]);
391
392        // DataSource::Skip
393        reader.seek(SeekFrom::Start(8192 + content_size as u64 + 4090)).expect("Failed to seek");
394        let _ = reader.read(&mut buffer[..6]).expect("Failed to read");
395        assert_eq!(&buffer[..6], &[0u8; 6]);
396
397        // DataSource::Fill
398        reader
399            .seek(SeekFrom::Start(8192 + content_size as u64 + 16384 + 3))
400            .expect("Failed to seek");
401        let _ = reader.read(&mut buffer[..9]).expect("Failed to read");
402        // Bear in mind the byte ordering is LE, so 0x01020304 == [0x04, 0x03, 0x02, 0x01]
403        assert_eq!(&buffer[..9], &[0x01, 0x04, 0x03, 0x02, 0x01, 0x04, 0x03, 0x02, 0x01]);
404
405        // DataSource::Skip
406        reader
407            .seek(SeekFrom::Start(8192 + content_size as u64 + 16384 + 4096 + 1))
408            .expect("Failed to seek");
409        let _ = reader.read(&mut buffer[..4095]).expect("Failed to read");
410        assert_eq!(&buffer[..4095], &[0u8; 4095]);
411
412        // Do an unaligned read spanning two sections (the last Fill and Skip)
413        reader
414            .seek(SeekFrom::Start(8192 + content_size as u64 + 16384 + 4090))
415            .expect("Failed to seek");
416        let _ = reader.read(&mut buffer[..9]).expect("Failed to read");
417        assert_eq!(&buffer[..9], &[0x02, 0x01, 0x04, 0x03, 0x02, 0x01, 0x00, 0x00, 0x00]);
418    }
419}