sparse/
reader.rs

1// Copyright 2023 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::{deserialize_from, Chunk, SparseHeader};
6use anyhow::{ensure, Context, Result};
7use byteorder::{ByteOrder as _, LE};
8use std::io::{Read, Seek, SeekFrom};
9
10/// SparseReader is an implementation of std::io::Read which transparently unpacks the underlying
11/// sparse image as it is read.
12/// If random access reads are not required, it is more performant to use `unsparse` to completely
13/// unpack a sparse image.
14pub struct SparseReader<R> {
15    reader: R,
16    // Offset into the logical (unsparsed) image.
17    offset: u64,
18    // Size of the logical (unsparsed) image.
19    size: u64,
20    // The second field is the offset into `reader` at which the payload of the chunk appears, for
21    // Raw chunks.
22    chunks: Vec<(Chunk, Option<u64>)>,
23}
24
25impl<R: Read + Seek> SparseReader<R> {
26    /// Attempts to create a SparseReader from the given image.  Returns failure if the image is
27    /// malformed.
28    pub fn new(mut reader: R) -> Result<Self> {
29        let header: SparseHeader =
30            deserialize_from(&mut reader).context("Failed to read header")?;
31        ensure!(header.valid(), "Invalid header");
32        let num_chunks = header.total_chunks as usize;
33
34        let mut chunks = vec![];
35        let mut offset = 0;
36        for _ in 0..num_chunks {
37            let chunk = Chunk::read_metadata(&mut reader, offset, header.blk_sz)?;
38            let data_offset = if chunk.chunk_type() == crate::format::CHUNK_TYPE_RAW {
39                let data_offset = reader.stream_position()?;
40                // Skip past the data payload
41                reader.seek(SeekFrom::Current(chunk.output_size() as i64))?;
42                Some(data_offset)
43            } else {
44                None
45            };
46            offset += chunk.output_size() as u64;
47            chunks.push((chunk, data_offset));
48        }
49
50        reader.seek(SeekFrom::Start(0)).context("Failed to rewind reader")?;
51        Ok(Self { reader, offset: 0, size: offset, chunks })
52    }
53
54    /// Returns the index of the current chunk in `self.chunks`.
55    fn current_chunk(&self) -> Option<usize> {
56        let mut off = 0;
57        let mut i = 0;
58        for (chunk, _) in &self.chunks {
59            let size = chunk.output_size() as u64;
60            if self.offset >= off && self.offset < off + size {
61                return Some(i);
62            }
63            off += size;
64            i += 1;
65        }
66        None
67    }
68
69    pub fn is_sparse_file(reader: &mut R) -> Result<bool> {
70        let header: SparseHeader = deserialize_from(reader)?;
71        let res = header.valid();
72        reader.seek(SeekFrom::Start(0)).context("Failed to rewind reader")?;
73        Ok(res)
74    }
75
76    pub(crate) fn chunks(&self) -> &Vec<(Chunk, Option<u64>)> {
77        &self.chunks
78    }
79}
80
81// It's assumed that `reader` already points at the right offset to read from the chunk, and `buf`
82// won't read past the end of the chunk.
83// `output_offset` is the logical position in the output stream.
84fn read_from_chunk<R: Read + Seek>(
85    reader: &mut R,
86    chunk: &Chunk,
87    output_offset: u64,
88    buf: &mut [u8],
89) -> std::io::Result<usize> {
90    match chunk {
91        Chunk::Raw { .. } => reader.read(buf),
92        Chunk::Fill { value, .. } => {
93            let mut value_bytes = value.to_le_bytes();
94            value_bytes.rotate_left(output_offset as usize % std::mem::size_of::<u32>());
95            let value_rotated = LE::read_u32(&value_bytes);
96            // Safety: `std::slice::align_to_mut` requires that everything in the dst slice is a
97            // valid type, which is true when going from [u8; 4] to [u32; 1].
98            let (prefix, wholes, suffix) = unsafe { buf.align_to_mut::<u32>() };
99            prefix.copy_from_slice(&value_bytes[value_bytes.len() - prefix.len()..]);
100            wholes.fill(value_rotated);
101            suffix.copy_from_slice(&value_bytes[..suffix.len()]);
102            Ok(buf.len())
103        }
104        Chunk::DontCare { .. } => {
105            buf.fill(0);
106            Ok(buf.len())
107        }
108        _ => unreachable!(),
109    }
110}
111
112impl<R: Read + Seek> Read for SparseReader<R> {
113    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
114        let mut bytes_read = 0;
115        while bytes_read < buf.len() {
116            let current_chunk_idx = match self.current_chunk() {
117                Some(i) => i,
118                None => return Ok(bytes_read),
119            };
120            let (current_chunk, chunk_start_offset) = &self.chunks[current_chunk_idx];
121            let offset_in_chunk = self.offset - current_chunk.output_offset().unwrap();
122            debug_assert!(offset_in_chunk < current_chunk.output_size() as u64);
123            let to_read = std::cmp::min(
124                buf.len() - bytes_read,
125                current_chunk.output_size() as usize - offset_in_chunk as usize,
126            );
127            if let Some(offset) = chunk_start_offset {
128                self.reader.seek(SeekFrom::Start(*offset + offset_in_chunk))?;
129            }
130            let bytes_read_from_chunk = read_from_chunk(
131                &mut self.reader,
132                current_chunk,
133                self.offset,
134                &mut buf[bytes_read..bytes_read + to_read],
135            )?;
136            bytes_read += bytes_read_from_chunk;
137            self.offset += bytes_read_from_chunk as u64;
138        }
139        Ok(bytes_read)
140    }
141}
142
143impl<R: Read + Seek> Seek for SparseReader<R> {
144    fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
145        self.offset = match pos {
146            SeekFrom::Start(pos) => pos,
147            SeekFrom::Current(delta) => self
148                .offset
149                .checked_add_signed(delta)
150                .ok_or_else(|| std::io::Error::from(std::io::ErrorKind::InvalidInput))?,
151            SeekFrom::End(delta) => self
152                .size
153                .checked_add_signed(delta)
154                .ok_or_else(|| std::io::Error::from(std::io::ErrorKind::InvalidInput))?,
155        };
156        Ok(self.offset)
157    }
158}
159
160#[cfg(test)]
161mod test {
162    use crate::builder::{DataSource, SparseImageBuilder};
163    use crate::reader::SparseReader;
164    use rand::rngs::SmallRng;
165    use rand::{RngCore, SeedableRng};
166    use std::io::{Read as _, Seek as _, SeekFrom, Write as _};
167    use tempfile::{NamedTempFile, TempDir};
168
169    #[test]
170    fn empty_reader() {
171        let tmpdir = TempDir::new().unwrap();
172
173        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
174        SparseImageBuilder::new().build(&mut sparse_file).expect("Build sparse image failed");
175        sparse_file.seek(SeekFrom::Start(0)).unwrap();
176
177        let mut reader =
178            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
179
180        let mut unsparsed_bytes = vec![];
181        reader.read_to_end(&mut unsparsed_bytes).expect("Failed to read unsparsed image");
182        assert_eq!(unsparsed_bytes.len(), 0);
183    }
184
185    #[test]
186    fn is_sparse_file() {
187        let tmpdir = TempDir::new().unwrap();
188
189        let data = {
190            let mut data = Box::new([0u8; 8192]);
191            let mut i: u8 = 0;
192            for d in data.as_mut() {
193                *d = i;
194                i = i.wrapping_add(1);
195            }
196            data
197        };
198
199        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
200        SparseImageBuilder::new()
201            .add_chunk(DataSource::Buffer(data))
202            .build(&mut sparse_file)
203            .expect("Build sparse image failed");
204        sparse_file.seek(SeekFrom::Start(0)).unwrap();
205
206        assert!(SparseReader::is_sparse_file(&mut sparse_file).expect("Should be a sparse file"));
207
208        let mut garbage_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
209        let garbage_data = vec![1; 4096];
210        garbage_file.write_all(&garbage_data).expect("Writing garbage file");
211        garbage_file.seek(SeekFrom::Start(0)).unwrap();
212
213        assert!(!SparseReader::is_sparse_file(&mut garbage_file).unwrap());
214    }
215
216    #[test]
217    fn seek() {
218        let tmpdir = TempDir::new().unwrap();
219
220        let data = {
221            let mut data = Box::new([0u8; 8192]);
222            let mut i: u8 = 0;
223            for d in data.as_mut() {
224                *d = i;
225                i = i.wrapping_add(1);
226            }
227            data
228        };
229
230        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
231        SparseImageBuilder::new()
232            .add_chunk(DataSource::Buffer(data))
233            .build(&mut sparse_file)
234            .expect("Build sparse image failed");
235        sparse_file.seek(SeekFrom::Start(0)).unwrap();
236        let mut reader =
237            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
238
239        let mut buf = [0u8; 1];
240        assert_eq!(0, reader.seek(SeekFrom::Start(0)).unwrap());
241        assert_eq!(1, reader.read(&mut buf).unwrap());
242        assert_eq!(buf[0], 0u8);
243
244        assert_eq!(100, reader.seek(SeekFrom::Start(100)).unwrap());
245        assert_eq!(1, reader.read(&mut buf).unwrap());
246        assert_eq!(buf[0], 100u8);
247
248        assert_eq!(99, reader.seek(SeekFrom::Current(-2)).unwrap());
249        assert_eq!(1, reader.read(&mut buf).unwrap());
250        assert_eq!(buf[0], 99u8);
251
252        assert_eq!(100, reader.seek(SeekFrom::Current(0)).unwrap());
253        assert_eq!(1, reader.read(&mut buf).unwrap());
254        assert_eq!(buf[0], 100u8);
255
256        assert_eq!(102, reader.seek(SeekFrom::Current(1)).unwrap());
257        assert_eq!(1, reader.read(&mut buf).unwrap());
258        assert_eq!(buf[0], 102u8);
259
260        assert_eq!(8191, reader.seek(SeekFrom::End(-1)).unwrap());
261        assert_eq!(1, reader.read(&mut buf).unwrap());
262        assert_eq!(buf[0], 255u8);
263
264        assert_eq!(8192, reader.seek(SeekFrom::End(0)).unwrap());
265        assert_eq!(0, reader.read(&mut buf).unwrap());
266
267        assert_eq!(8193, reader.seek(SeekFrom::End(1)).unwrap());
268        assert_eq!(0, reader.read(&mut buf).unwrap());
269    }
270
271    #[test]
272    fn read_past_eof() {
273        let tmpdir = TempDir::new().unwrap();
274
275        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
276        SparseImageBuilder::new()
277            .add_chunk(DataSource::Buffer(Box::new([0xffu8; 8192])))
278            .build(&mut sparse_file)
279            .expect("Build sparse image failed");
280        sparse_file.seek(SeekFrom::Start(0)).unwrap();
281
282        let mut reader =
283            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
284
285        let mut buf = [0u8; 2];
286
287        reader.seek(SeekFrom::Start(8191)).expect("Seek failed");
288        assert_eq!(reader.read(&mut buf).expect("Failed to read"), 1);
289
290        reader.seek(SeekFrom::Start(8192)).expect("Seek failed");
291        assert_eq!(reader.read(&mut buf).expect("Failed to read"), 0);
292    }
293
294    #[test]
295    fn full_read() {
296        let tmpdir = TempDir::new().unwrap();
297
298        // Generate a large temporary file
299        let (mut file, _temp_path) = NamedTempFile::new_in(&tmpdir).unwrap().into_parts();
300        let mut rng = SmallRng::from_entropy();
301        let mut data = Vec::<u8>::new();
302        data.resize(100 * 4096, 0);
303        rng.fill_bytes(&mut data);
304        file.write_all(&data).unwrap();
305        file.flush().unwrap();
306        file.seek(SeekFrom::Start(0)).unwrap();
307        let content_size = data.len();
308
309        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
310        SparseImageBuilder::new()
311            .add_chunk(DataSource::Buffer(Box::new([0xffu8; 8192])))
312            .add_chunk(DataSource::Reader { reader: Box::new(file), size: content_size as u64 })
313            .add_chunk(DataSource::Skip(16384))
314            .add_chunk(DataSource::Fill(0xaaaa_aaaau32, 1024))
315            .add_chunk(DataSource::Skip(4096))
316            .build(&mut sparse_file)
317            .expect("Build sparse image failed");
318        sparse_file.seek(SeekFrom::Start(0)).unwrap();
319
320        let mut reader =
321            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
322
323        let mut unsparsed_bytes = vec![];
324        reader.read_to_end(&mut unsparsed_bytes).expect("Failed to read unsparsed image");
325        assert_eq!(unsparsed_bytes.len(), 8192 + content_size + 16384 + 4096 + 4096);
326        assert_eq!(&unsparsed_bytes[..8192], &[0xffu8; 8192]);
327        assert_eq!(&unsparsed_bytes[8192..8192 + content_size], &data[..]);
328        assert_eq!(
329            &unsparsed_bytes[8192 + content_size..8192 + content_size + 16384],
330            &[0u8; 16384]
331        );
332        assert_eq!(
333            &unsparsed_bytes[8192 + content_size + 16384..8192 + content_size + 16384 + 4096],
334            &[0xaau8; 4096]
335        );
336        assert_eq!(&unsparsed_bytes[8192 + content_size + 16384 + 4096..], &[0u8; 4096]);
337    }
338
339    #[test]
340    fn unaligned_reads() {
341        let tmpdir = TempDir::new().unwrap();
342
343        // Generate a large temporary file
344        let (mut file, _temp_path) = NamedTempFile::new_in(&tmpdir).unwrap().into_parts();
345        let mut rng = SmallRng::from_entropy();
346        let mut data = Vec::<u8>::new();
347        data.resize(100 * 4096, 0);
348        rng.fill_bytes(&mut data);
349        file.write_all(&data).unwrap();
350        file.flush().unwrap();
351        file.seek(SeekFrom::Start(0)).unwrap();
352        let content_size = data.len();
353
354        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
355        SparseImageBuilder::new()
356            .add_chunk(DataSource::Buffer(Box::new([0xffu8; 8192])))
357            .add_chunk(DataSource::Reader { reader: Box::new(file), size: content_size as u64 })
358            .add_chunk(DataSource::Skip(16384))
359            .add_chunk(DataSource::Fill(0x0102_0304u32, 1024))
360            .add_chunk(DataSource::Skip(4096))
361            .build(&mut sparse_file)
362            .expect("Build sparse image failed");
363        sparse_file.seek(SeekFrom::Start(0)).unwrap();
364
365        let mut reader =
366            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
367
368        let mut buffer = [0u8; 4096];
369
370        // Do an unaligned read from each section
371
372        // DataSource::Buffer
373        reader.seek(SeekFrom::Start(10)).expect("Failed to seek");
374        let _ = reader.read(&mut buffer[..20]).expect("Failed to read");
375        assert_eq!(&buffer[..20], &[0xffu8; 20]);
376
377        // DataSource::File
378        reader.seek(SeekFrom::Start(8192 + 4095)).expect("Failed to seek");
379        let _ = reader.read(&mut buffer[..2]).expect("Failed to read");
380        assert_eq!(&buffer[..2], &data[4095..4097]);
381
382        // DataSource::Skip
383        reader.seek(SeekFrom::Start(8192 + content_size as u64 + 4090)).expect("Failed to seek");
384        let _ = reader.read(&mut buffer[..6]).expect("Failed to read");
385        assert_eq!(&buffer[..6], &[0u8; 6]);
386
387        // DataSource::Fill
388        reader
389            .seek(SeekFrom::Start(8192 + content_size as u64 + 16384 + 3))
390            .expect("Failed to seek");
391        let _ = reader.read(&mut buffer[..9]).expect("Failed to read");
392        // Bear in mind the byte ordering is LE, so 0x01020304 == [0x04, 0x03, 0x02, 0x01]
393        assert_eq!(&buffer[..9], &[0x01, 0x04, 0x03, 0x02, 0x01, 0x04, 0x03, 0x02, 0x01]);
394
395        // DataSource::Skip
396        reader
397            .seek(SeekFrom::Start(8192 + content_size as u64 + 16384 + 4096 + 1))
398            .expect("Failed to seek");
399        let _ = reader.read(&mut buffer[..4095]).expect("Failed to read");
400        assert_eq!(&buffer[..4095], &[0u8; 4095]);
401
402        // Do an unaligned read spanning two sections (the last Fill and Skip)
403        reader
404            .seek(SeekFrom::Start(8192 + content_size as u64 + 16384 + 4090))
405            .expect("Failed to seek");
406        let _ = reader.read(&mut buffer[..9]).expect("Failed to read");
407        assert_eq!(&buffer[..9], &[0x02, 0x01, 0x04, 0x03, 0x02, 0x01, 0x00, 0x00, 0x00]);
408    }
409}