fuchsia_archive/
lib.rs

1// Copyright 2020 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! # Reading, Writing and Listing Fuchsia Archives (FAR) Data
6//!
7//! This crate is a Rust port of the
8//! [Go Far package](https://fuchsia.googlesource.com/fuchsia/+/HEAD/garnet/go/src/far/).
9//!
10//! # Example
11//!
12//! ```
13//! use anyhow::Error;
14//! use std::collections::BTreeMap;
15//! use std::fs;
16//! use std::io::{Cursor, Read, Write};
17//! use tempfile::TempDir;
18//!
19//! fn create_test_files(file_names: &[&str]) -> Result<TempDir, Error> {
20//!     let tmp_dir = TempDir::new()?;
21//!     for file_name in file_names {
22//!         let file_path = tmp_dir.path().join(file_name);
23//!         let parent_dir = file_path.parent().unwrap();
24//!         fs::create_dir_all(&parent_dir)?;
25//!         let file_path = tmp_dir.path().join(file_name);
26//!         let mut tmp_file = fs::File::create(&file_path)?;
27//!         writeln!(tmp_file, "{}", file_name)?;
28//!     }
29//!     Ok(tmp_dir)
30//! }
31//!
32//! let file_names = ["b", "a", "dir/c"];
33//! let test_dir = create_test_files(&file_names).unwrap();
34//! let mut path_content_map: BTreeMap<&str, (u64, Box<dyn Read>)> = BTreeMap::new();
35//! for file_name in file_names.iter() {
36//!     let file = fs::File::open(test_dir.path().join(file_name)).unwrap();
37//!     path_content_map.insert(file_name, (file.metadata().unwrap().len(), Box::new(file)));
38//! }
39//! let mut result = Vec::new();
40//! fuchsia_archive::write(&mut result, path_content_map).unwrap();
41//! let result = &result[..];
42//!
43//! let reader = fuchsia_archive::Reader::new(Cursor::new(result)).unwrap();
44//! let entries = reader.list().map(|e| e.path()).collect::<Vec<_>>();
45//! assert_eq!(entries, ["a", "b", "dir/c"]);
46//! ```
47
48#![allow(clippy::let_unit_value)]
49// TODO(https://fxbug.dev/42073005): Remove this allow once the lint is fixed.
50#![allow(unknown_lints, clippy::extra_unused_type_parameters)]
51
52use zerocopy::byteorder::little_endian::{U16, U32, U64};
53
54mod error;
55pub use error::Error;
56
57mod name;
58
59mod read;
60pub use read::Reader;
61
62mod utf8_reader;
63pub use utf8_reader::Utf8Reader;
64
65mod async_read;
66pub use async_read::AsyncReader;
67
68mod async_utf8_reader;
69pub use async_utf8_reader::AsyncUtf8Reader;
70
71mod write;
72pub use write::write;
73
74pub const MAGIC_INDEX_VALUE: [u8; 8] = [0xc8, 0xbf, 0x0b, 0x48, 0xad, 0xab, 0xc5, 0x11];
75
76pub type ChunkType = [u8; 8];
77
78pub const DIR_CHUNK_TYPE: ChunkType = *b"DIR-----";
79pub const DIR_NAMES_CHUNK_TYPE: ChunkType = *b"DIRNAMES";
80
81#[derive(
82    PartialEq,
83    Eq,
84    Debug,
85    Clone,
86    Copy,
87    Default,
88    zerocopy::IntoBytes,
89    zerocopy::FromBytes,
90    zerocopy::KnownLayout,
91    zerocopy::Immutable,
92)]
93#[repr(C)]
94struct Index {
95    magic: [u8; 8],
96    length: U64,
97}
98
99const INDEX_LEN: u64 = std::mem::size_of::<Index>() as u64;
100
101#[derive(
102    PartialEq,
103    Eq,
104    Debug,
105    Clone,
106    Copy,
107    Default,
108    zerocopy::IntoBytes,
109    zerocopy::FromBytes,
110    zerocopy::KnownLayout,
111    zerocopy::Immutable,
112)]
113#[repr(C)]
114struct IndexEntry {
115    chunk_type: ChunkType,
116    offset: U64,
117    length: U64,
118}
119
120const INDEX_ENTRY_LEN: u64 = std::mem::size_of::<IndexEntry>() as u64;
121
122#[derive(
123    PartialEq,
124    Eq,
125    Debug,
126    Clone,
127    Copy,
128    Default,
129    zerocopy::IntoBytes,
130    zerocopy::FromBytes,
131    zerocopy::KnownLayout,
132    zerocopy::Immutable,
133)]
134#[repr(C)]
135struct DirectoryEntry {
136    name_offset: U32,
137    name_length: U16,
138    reserved: U16,
139    data_offset: U64,
140    data_length: U64,
141    reserved2: U64,
142}
143
144const DIRECTORY_ENTRY_LEN: u64 = std::mem::size_of::<DirectoryEntry>() as u64;
145const CONTENT_ALIGNMENT: u64 = 4096;
146
147/// An entry in an archive, returned by Reader::list
148#[derive(Debug, PartialEq, Eq)]
149pub struct Entry<'a> {
150    path: &'a [u8],
151    offset: u64,
152    length: u64,
153}
154
155impl<'a> Entry<'a> {
156    /// The path of the entry.
157    pub fn path(&self) -> &'a [u8] {
158        self.path
159    }
160
161    /// The offset in bytes of the entry's content chunk.
162    pub fn offset(&self) -> u64 {
163        self.offset
164    }
165
166    /// The length in bytes of the entry's content chunk.
167    pub fn length(&self) -> u64 {
168        self.length
169    }
170}
171
172/// An entry in a UTF-8 archive, returned by Reader::list
173#[derive(Debug, PartialEq, Eq)]
174pub struct Utf8Entry<'a> {
175    path: &'a str,
176    offset: u64,
177    length: u64,
178}
179
180impl<'a> Utf8Entry<'a> {
181    /// The path of the entry.
182    pub fn path(&self) -> &'a str {
183        self.path
184    }
185
186    /// The offset in bytes of the entry's content chunk.
187    pub fn offset(&self) -> u64 {
188        self.offset
189    }
190
191    /// The length in bytes of the entry's content chunk.
192    pub fn length(&self) -> u64 {
193        self.length
194    }
195}
196
197fn validate_directory_entries_and_paths(
198    directory_entries: &[DirectoryEntry],
199    path_data: &[u8],
200    stream_len: u64,
201    end_of_last_non_content_chunk: u64,
202) -> Result<(), Error> {
203    let mut previous_name: Option<&[u8]> = None;
204    let mut previous_entry: Option<&DirectoryEntry> = None;
205    for (i, entry) in directory_entries.iter().enumerate() {
206        let name = validate_name_for_entry(entry, i, path_data, previous_name)?;
207        let () = validate_content_chunk(
208            entry,
209            previous_entry,
210            name,
211            stream_len,
212            end_of_last_non_content_chunk,
213        )?;
214        previous_name = Some(name);
215        previous_entry = Some(entry);
216    }
217    Ok(())
218}
219
220// Obtain name for current directory entry, making sure it is a valid name and lexicographically
221// greater than the previous name.
222fn validate_name_for_entry<'a>(
223    entry: &DirectoryEntry,
224    entry_index: usize,
225    path_data: &'a [u8],
226    previous_name: Option<&[u8]>,
227) -> Result<&'a [u8], Error> {
228    let offset = entry.name_offset.get().into_usize();
229    if offset >= path_data.len() {
230        return Err(Error::PathDataOffsetTooLarge {
231            entry_index,
232            offset,
233            chunk_size: path_data.len(),
234        });
235    }
236
237    let end = offset + usize::from(entry.name_length.get());
238    if end > path_data.len() {
239        return Err(Error::PathDataLengthTooLarge {
240            entry_index,
241            offset,
242            length: entry.name_length.get(),
243            chunk_size: path_data.len(),
244        });
245    }
246
247    let name = crate::name::validate_name(&path_data[offset..end])?;
248
249    // Directory entries must be strictly increasing by name
250    if let Some(previous_name) = previous_name {
251        if previous_name >= name {
252            return Err(Error::DirectoryEntriesOutOfOrder {
253                entry_index,
254                previous_name: previous_name.into(),
255                name: name.into(),
256            });
257        }
258    }
259    Ok(name)
260}
261
262fn validate_content_chunk(
263    entry: &DirectoryEntry,
264    previous_entry: Option<&DirectoryEntry>,
265    name: &[u8],
266    stream_len: u64,
267    end_of_last_non_content_chunk: u64,
268) -> Result<(), Error> {
269    // Chunks must be non-overlapping and tightly packed
270    let expected_offset = if let Some(previous_entry) = previous_entry {
271        // Both the addition and rounding were checked when the previous entry was validated.
272        (previous_entry.data_offset.get() + previous_entry.data_length.get())
273            .next_multiple_of(CONTENT_ALIGNMENT)
274    } else {
275        end_of_last_non_content_chunk
276            .checked_next_multiple_of(CONTENT_ALIGNMENT)
277            .ok_or(Error::ContentChunkOffsetOverflow)?
278    };
279    if entry.data_offset.get() != expected_offset {
280        return Err(Error::InvalidContentChunkOffset {
281            name: name.into(),
282            expected: expected_offset,
283            actual: entry.data_offset.get(),
284        });
285    }
286
287    // Chunks must be contained in the archive
288    let stream_len_lower_bound = entry
289        .data_offset
290        .get()
291        .checked_add(entry.data_length.get())
292        .and_then(|end| end.checked_next_multiple_of(CONTENT_ALIGNMENT))
293        .ok_or_else(|| Error::ContentChunkEndOverflow {
294            name: name.into(),
295            offset: entry.data_offset.get(),
296            length: entry.data_length.get(),
297        })?;
298    if stream_len_lower_bound > stream_len {
299        return Err(Error::ContentChunkBeyondArchive {
300            name: name.into(),
301            lower_bound: stream_len_lower_bound,
302            archive_size: stream_len,
303        });
304    }
305    Ok(())
306}
307
308// Return an iterator over the items in an archive.
309fn list<'a>(
310    directory_entries: &'a [DirectoryEntry],
311    path_data: &'a [u8],
312) -> impl ExactSizeIterator<Item = Entry<'a>> {
313    directory_entries.iter().map(|e| Entry {
314        path: &path_data[e.name_offset.get().into_usize()..][..usize::from(e.name_length.get())],
315        offset: e.data_offset.get(),
316        length: e.data_length.get(),
317    })
318}
319
320// Returns the directory entry with path `target_path`, or an error if there is not one.
321// O(log(# directory entries))
322fn find_directory_entry<'a>(
323    directory_entries: &'a [DirectoryEntry],
324    path_data: &'_ [u8],
325    target_path: &'_ [u8],
326) -> Result<&'a DirectoryEntry, Error> {
327    // FAR spec requires, and [Async]Reader::new enforces, that directory entries are sorted by
328    // path data
329    // https://fuchsia.dev/fuchsia-src/development/source_code/archive_format?hl=en#directory_chunk_type_dir-----
330    let i = directory_entries
331        .binary_search_by_key(&target_path, |e| {
332            &path_data[e.name_offset.get().into_usize()..][..usize::from(e.name_length.get())]
333        })
334        .map_err(|_| Error::PathNotPresent(target_path.into()))?;
335    Ok(directory_entries.get(i).expect("binary_search on success returns in-bounds index"))
336}
337
338trait SafeIntegerConversion {
339    fn into_usize(self) -> usize;
340}
341
342impl SafeIntegerConversion for u32 {
343    fn into_usize(self) -> usize {
344        static_assertions::const_assert!(
345            std::mem::size_of::<u32>() <= std::mem::size_of::<usize>()
346        );
347        self as usize
348    }
349}
350
351#[cfg(test)]
352pub(crate) mod tests {
353    use super::*;
354    use std::io::{Cursor, Read as _, Seek as _, SeekFrom, Write as _};
355    use zerocopy::IntoBytes as _;
356
357    pub(crate) fn example_archive() -> Vec<u8> {
358        let mut b: Vec<u8> = vec![0; 16384];
359        #[rustfmt::skip]
360        let header = vec![
361            /* magic */
362            0xc8, 0xbf, 0x0b, 0x48, 0xad, 0xab, 0xc5, 0x11,
363            /* length of index entries */
364            0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
365            /* index entry for directory chunk */
366            /* chunk type */
367            0x44, 0x49, 0x52, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d,
368            /* offset to chunk */
369            0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
370            /* length of chunk */
371            0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
372            /* index entry for directory names chunk */
373            /* chunk type */
374            0x44, 0x49, 0x52, 0x4e, 0x41, 0x4d, 0x45, 0x53,
375            /* offset to chunk */
376            0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
377            /* length of chunk */
378            0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
379            /* directory chunk */
380            /* directory table entry for path "a" */
381            0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
382            0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
383            0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
384            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
385            /* directory table entry for path "b" */
386            0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
387            0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
388            0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
389            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
390            /* directory table entry for path "dir/c" */
391            0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
392            0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
393            0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
394            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
395            /* directory names chunk with one byte of padding */
396            b'a', b'b', b'd', b'i', b'r', b'/', b'c', 0x00,
397        ];
398        b[0..header.len()].copy_from_slice(header.as_slice());
399        let content_a = b"a\n";
400        let a_loc = 4096;
401        b[a_loc..a_loc + content_a.len()].copy_from_slice(content_a);
402        let content_b = b"b\n";
403        let b_loc = 8192;
404        b[b_loc..b_loc + content_b.len()].copy_from_slice(content_b);
405        let content_c = b"dir/c\n";
406        let c_loc = 12288;
407        b[c_loc..c_loc + content_c.len()].copy_from_slice(content_c);
408        b
409    }
410
411    #[test]
412    fn test_serialize_deserialize_index() {
413        let mut target = Cursor::new(Vec::new());
414        let index = Index { magic: MAGIC_INDEX_VALUE, length: (2 * INDEX_ENTRY_LEN).into() };
415        let () = target.write_all(index.as_bytes()).unwrap();
416        assert_eq!(target.get_ref().len() as u64, INDEX_LEN);
417        assert_eq!(target.seek(SeekFrom::Start(0)).unwrap(), 0);
418
419        let mut decoded_index = Index::default();
420        let () = target.get_ref().as_slice().read_exact(decoded_index.as_mut_bytes()).unwrap();
421        assert_eq!(index, decoded_index);
422    }
423
424    #[test]
425    fn test_serialize_deserialize_index_entry() {
426        let mut target = Cursor::new(Vec::new());
427        let index_entry =
428            IndexEntry { chunk_type: DIR_CHUNK_TYPE, offset: 999.into(), length: 444.into() };
429        let () = target.write_all(index_entry.as_bytes()).unwrap();
430        assert_eq!(target.get_ref().len() as u64, INDEX_ENTRY_LEN);
431        assert_eq!(target.seek(SeekFrom::Start(0)).unwrap(), 0);
432
433        let mut decoded_index_entry = IndexEntry::default();
434        let () =
435            target.get_ref().as_slice().read_exact(decoded_index_entry.as_mut_bytes()).unwrap();
436        assert_eq!(index_entry, decoded_index_entry);
437    }
438
439    #[test]
440    fn test_serialize_deserialize_directory_entry() {
441        let mut target = Cursor::new(Vec::new());
442        let directory_entry = DirectoryEntry {
443            name_offset: 33.into(),
444            name_length: 66.into(),
445            reserved: 0.into(),
446            data_offset: 99.into(),
447            data_length: 1011.into(),
448            reserved2: 0.into(),
449        };
450        let () = target.write_all(directory_entry.as_bytes()).unwrap();
451        assert_eq!(target.get_ref().len() as u64, DIRECTORY_ENTRY_LEN);
452        assert_eq!(target.seek(SeekFrom::Start(0)).unwrap(), 0);
453
454        let mut decoded_directory_entry = DirectoryEntry::default();
455        let () =
456            target.get_ref().as_slice().read_exact(decoded_directory_entry.as_mut_bytes()).unwrap();
457        assert_eq!(directory_entry, decoded_directory_entry);
458    }
459
460    #[test]
461    fn test_struct_sizes() {
462        assert_eq!(INDEX_LEN, 8 + 8);
463        assert_eq!(INDEX_ENTRY_LEN, 8 + 8 + 8);
464        assert_eq!(DIRECTORY_ENTRY_LEN, 4 + 2 + 2 + 8 + 8 + 8);
465    }
466
467    #[test]
468    fn into_usize_no_panic() {
469        assert_eq!(u32::MAX.into_usize(), u32::MAX.try_into().unwrap());
470    }
471}