fuchsia_archive/
read.rs

1// Copyright 2020 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::{
6    DirectoryEntry, Error, Index, IndexEntry, DIRECTORY_ENTRY_LEN, DIR_CHUNK_TYPE,
7    DIR_NAMES_CHUNK_TYPE, INDEX_ENTRY_LEN, INDEX_LEN, MAGIC_INDEX_VALUE,
8};
9use std::convert::TryInto as _;
10use std::io::{Read, Seek, SeekFrom};
11use zerocopy::IntoBytes as _;
12
13/// A struct to open and read FAR-formatted archive.
14#[derive(Debug)]
15pub struct Reader<T>
16where
17    T: Read + Seek,
18{
19    source: T,
20    directory_entries: Box<[DirectoryEntry]>,
21    path_data: Box<[u8]>,
22}
23
24impl<T> Reader<T>
25where
26    T: Read + Seek,
27{
28    /// Create a new Reader for the provided source.
29    pub fn new(mut source: T) -> Result<Self, Error> {
30        let index = Self::read_index_header(&mut source)?;
31        let (dir_index, dir_name_index, end_of_last_non_content_chunk) =
32            Self::read_index_entries(&mut source, &index)?;
33        let stream_len = source.seek(SeekFrom::End(0)).map_err(Error::Seek)?;
34
35        // Read directory entries
36        if !dir_index.length.get().is_multiple_of(DIRECTORY_ENTRY_LEN) {
37            return Err(Error::InvalidDirectoryChunkLen(dir_index.length.get()));
38        }
39        let mut directory_entries =
40            vec![
41                DirectoryEntry::default();
42                (dir_index.length.get() / DIRECTORY_ENTRY_LEN)
43                    .try_into()
44                    .map_err(|_| { Error::InvalidDirectoryChunkLen(dir_index.length.get()) })?
45            ];
46        source.seek(SeekFrom::Start(dir_index.offset.get())).map_err(Error::Seek)?;
47        source.read_exact(directory_entries.as_mut_bytes()).map_err(Error::Read)?;
48        let directory_entries = directory_entries.into_boxed_slice();
49
50        // Read path data
51        if !dir_name_index.length.get().is_multiple_of(8)
52            || dir_name_index.length.get() > stream_len
53        {
54            return Err(Error::InvalidDirectoryNamesChunkLen(dir_name_index.length.get()));
55        }
56        let path_data_length = dir_name_index
57            .length
58            .get()
59            .try_into()
60            .map_err(|_| Error::InvalidDirectoryNamesChunkLen(dir_name_index.length.get()))?;
61        let mut path_data = vec![0; path_data_length];
62        source.seek(SeekFrom::Start(dir_name_index.offset.get())).map_err(Error::Seek)?;
63        source.read_exact(path_data.as_mut_slice()).map_err(Error::Read)?;
64        let path_data = path_data.into_boxed_slice();
65
66        let () = crate::validate_directory_entries_and_paths(
67            &directory_entries,
68            &path_data,
69            stream_len,
70            end_of_last_non_content_chunk,
71        )?;
72
73        Ok(Self { source, directory_entries, path_data })
74    }
75
76    // Assumes `source` cursor is at the beginning of the file.
77    fn read_index_header(source: &mut T) -> Result<Index, Error> {
78        let mut index = Index::default();
79        source.read_exact(index.as_mut_bytes()).map_err(Error::Read)?;
80        if index.magic != MAGIC_INDEX_VALUE {
81            Err(Error::InvalidMagic(index.magic))
82        } else if !index.length.get().is_multiple_of(INDEX_ENTRY_LEN)
83            || INDEX_LEN.checked_add(index.length.get()).is_none()
84        {
85            Err(Error::InvalidIndexEntriesLen(index.length.get()))
86        } else {
87            Ok(index)
88        }
89    }
90
91    // Returns (directory_index, directory_names_index, end_of_last_chunk).
92    // Assumes `source` cursor is at the beginning of the index entries.
93    fn read_index_entries(
94        source: &mut T,
95        index: &Index,
96    ) -> Result<(IndexEntry, IndexEntry, u64), Error> {
97        let mut dir_index: Option<IndexEntry> = None;
98        let mut dir_name_index: Option<IndexEntry> = None;
99        let mut previous_entry: Option<IndexEntry> = None;
100        for _ in 0..index.length.get() / INDEX_ENTRY_LEN {
101            let mut entry = IndexEntry::default();
102            source.read_exact(entry.as_mut_bytes()).map_err(Error::Read)?;
103
104            let expected_offset = if let Some(previous_entry) = previous_entry {
105                if previous_entry.chunk_type >= entry.chunk_type {
106                    return Err(Error::IndexEntriesOutOfOrder {
107                        prev: previous_entry.chunk_type,
108                        next: entry.chunk_type,
109                    });
110                }
111                previous_entry.offset.get() + previous_entry.length.get()
112            } else {
113                INDEX_LEN + index.length.get()
114            };
115            if entry.offset.get() != expected_offset {
116                return Err(Error::InvalidChunkOffset {
117                    chunk_type: entry.chunk_type,
118                    expected: expected_offset,
119                    actual: entry.offset.get(),
120                });
121            }
122            if entry.offset.get().checked_add(entry.length.get()).is_none() {
123                return Err(Error::InvalidChunkLength {
124                    chunk_type: entry.chunk_type,
125                    offset: entry.offset.get(),
126                    length: entry.length.get(),
127                });
128            }
129
130            match entry.chunk_type {
131                DIR_CHUNK_TYPE => {
132                    dir_index = Some(entry);
133                }
134                DIR_NAMES_CHUNK_TYPE => {
135                    dir_name_index = Some(entry);
136                }
137                // FAR spec does not forbid unknown chunk types
138                _ => {}
139            }
140            previous_entry = Some(entry);
141        }
142        let end_of_last_chunk = if let Some(previous_entry) = previous_entry {
143            previous_entry.offset.get() + previous_entry.length.get()
144        } else {
145            INDEX_LEN
146        };
147        Ok((
148            dir_index.ok_or(Error::MissingDirectoryChunkIndexEntry)?,
149            dir_name_index.ok_or(Error::MissingDirectoryNamesChunkIndexEntry)?,
150            end_of_last_chunk,
151        ))
152    }
153
154    /// Return a list of the items in the archive
155    pub fn list(&self) -> impl ExactSizeIterator<Item = crate::Entry<'_>> {
156        crate::list(&self.directory_entries, &self.path_data)
157    }
158
159    /// Read the entire contents of the entry with the specified path.
160    /// O(log(# directory entries))
161    pub fn read_file(&mut self, path: &[u8]) -> Result<Vec<u8>, Error> {
162        let entry = crate::find_directory_entry(&self.directory_entries, &self.path_data, path)?;
163        let mut data = vec![
164            0;
165            usize::try_from(entry.data_length.get()).map_err(|_| {
166                Error::ContentChunkDoesNotFitInMemory {
167                    name: path.into(),
168                    chunk_size: entry.data_length.get(),
169                }
170            })?
171        ];
172        let _: u64 =
173            self.source.seek(SeekFrom::Start(entry.data_offset.get())).map_err(Error::Seek)?;
174        let () = self.source.read_exact(&mut data).map_err(Error::Read)?;
175        Ok(data)
176    }
177
178    /// Get the size in bytes of the entry with the specified path.
179    /// O(log(# directory entries))
180    pub fn get_size(&mut self, path: &[u8]) -> Result<u64, Error> {
181        Ok(crate::find_directory_entry(&self.directory_entries, &self.path_data, path)?
182            .data_length
183            .get())
184    }
185}
186
187#[cfg(test)]
188mod tests {
189    use super::*;
190    use crate::tests::example_archive;
191    use assert_matches::assert_matches;
192    use std::io::Cursor;
193
194    #[test]
195    fn list() {
196        let example = example_archive();
197        let reader = Reader::new(Cursor::new(&example)).unwrap();
198        itertools::assert_equal(
199            reader.list(),
200            [
201                crate::Entry { path: b"a", offset: 4096, length: 2 },
202                crate::Entry { path: b"b", offset: 8192, length: 2 },
203                crate::Entry { path: b"dir/c", offset: 12288, length: 6 },
204            ],
205        );
206    }
207
208    #[test]
209    fn read_file() {
210        let example = example_archive();
211        let mut reader = Reader::new(Cursor::new(&example)).unwrap();
212        for one_name in ["a", "b", "dir/c"].iter().map(|s| s.as_bytes()) {
213            let content = reader.read_file(one_name).unwrap();
214            let content_str = std::str::from_utf8(&content).unwrap();
215            let expected = format!("{}\n", std::str::from_utf8(one_name).unwrap());
216            assert_eq!(content_str, &expected);
217        }
218    }
219
220    #[test]
221    fn get_size() {
222        let example = example_archive();
223        let mut reader = Reader::new(Cursor::new(&example)).unwrap();
224        for one_name in ["a", "b", "dir/c"].iter().map(|s| s.as_bytes()) {
225            let returned_size = reader.get_size(one_name).unwrap();
226            let expected_size = one_name.len() + 1;
227            assert_eq!(returned_size, u64::try_from(expected_size).unwrap());
228        }
229    }
230
231    #[test]
232    fn accessors_error_on_missing_path() {
233        let example = example_archive();
234        let mut reader = Reader::new(Cursor::new(&example)).unwrap();
235        assert_matches!(
236            reader.read_file(b"missing-path"),
237            Err(Error::PathNotPresent(path)) if path == b"missing-path"
238        );
239        assert_matches!(
240            reader.get_size(b"missing-path"),
241            Err(Error::PathNotPresent(path)) if path == b"missing-path"
242        );
243    }
244}