fuchsia_archive/
async_read.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::{
6    DirectoryEntry, Error, Index, IndexEntry, DIRECTORY_ENTRY_LEN, DIR_CHUNK_TYPE,
7    DIR_NAMES_CHUNK_TYPE, INDEX_ENTRY_LEN, INDEX_LEN, MAGIC_INDEX_VALUE,
8};
9use fuchsia_fs::file::{AsyncGetSize, AsyncGetSizeExt, AsyncReadAt, AsyncReadAtExt};
10use std::convert::TryInto as _;
11use zerocopy::IntoBytes as _;
12
13/// A struct to open and read a FAR-formatted archive asynchronously.
14#[derive(Debug)]
15pub struct AsyncReader<T>
16where
17    T: AsyncReadAt + AsyncGetSize + Unpin,
18{
19    source: T,
20    directory_entries: Box<[DirectoryEntry]>,
21    path_data: Box<[u8]>,
22}
23
24impl<T> AsyncReader<T>
25where
26    T: AsyncReadAt + AsyncGetSize + Unpin,
27{
28    /// Create a new AsyncReader for the provided source.
29    pub async fn new(mut source: T) -> Result<Self, Error> {
30        let index = Self::read_index_header(&mut source).await?;
31        let (dir_index, dir_name_index, end_of_last_non_content_chunk) =
32            Self::read_index_entries(&mut source, &index).await?;
33        let stream_len = source.get_size().await.map_err(Error::GetSize)?;
34
35        // Read directory entries
36        if !dir_index.length.get().is_multiple_of(DIRECTORY_ENTRY_LEN) {
37            return Err(Error::InvalidDirectoryChunkLen(dir_index.length.get()));
38        }
39        let mut directory_entries =
40            vec![
41                DirectoryEntry::default();
42                (dir_index.length.get() / DIRECTORY_ENTRY_LEN)
43                    .try_into()
44                    .map_err(|_| { Error::InvalidDirectoryChunkLen(dir_index.length.get()) })?
45            ];
46        source
47            .read_at_exact(dir_index.offset.get(), directory_entries.as_mut_bytes())
48            .await
49            .map_err(Error::Read)?;
50        let directory_entries = directory_entries.into_boxed_slice();
51
52        // Read path data
53        if !dir_name_index.length.get().is_multiple_of(8)
54            || dir_name_index.length.get() > stream_len
55        {
56            return Err(Error::InvalidDirectoryNamesChunkLen(dir_name_index.length.get()));
57        }
58        let path_data_length = dir_name_index
59            .length
60            .get()
61            .try_into()
62            .map_err(|_| Error::InvalidDirectoryNamesChunkLen(dir_name_index.length.get()))?;
63        let mut path_data = vec![0; path_data_length];
64        source
65            .read_at_exact(dir_name_index.offset.get(), &mut path_data)
66            .await
67            .map_err(Error::Read)?;
68        let path_data = path_data.into_boxed_slice();
69
70        let () = crate::validate_directory_entries_and_paths(
71            &directory_entries,
72            &path_data,
73            stream_len,
74            end_of_last_non_content_chunk,
75        )?;
76
77        Ok(Self { source, directory_entries, path_data })
78    }
79
80    async fn read_index_header(source: &mut T) -> Result<Index, Error> {
81        let mut index = Index::default();
82        source.read_at_exact(0, index.as_mut_bytes()).await.map_err(Error::Read)?;
83        if index.magic != MAGIC_INDEX_VALUE {
84            Err(Error::InvalidMagic(index.magic))
85        } else if !index.length.get().is_multiple_of(INDEX_ENTRY_LEN)
86            || INDEX_LEN.checked_add(index.length.get()).is_none()
87        {
88            Err(Error::InvalidIndexEntriesLen(index.length.get()))
89        } else {
90            Ok(index)
91        }
92    }
93
94    // Returns (directory_index, directory_names_index, end_of_last_chunk).
95    async fn read_index_entries(
96        source: &mut T,
97        index: &Index,
98    ) -> Result<(IndexEntry, IndexEntry, u64), Error> {
99        let mut dir_index: Option<IndexEntry> = None;
100        let mut dir_name_index: Option<IndexEntry> = None;
101        let mut previous_entry: Option<IndexEntry> = None;
102        for i in 0..index.length.get() / INDEX_ENTRY_LEN {
103            let mut entry = IndexEntry::default();
104            let entry_offset = INDEX_LEN + INDEX_ENTRY_LEN * i;
105            source.read_at_exact(entry_offset, entry.as_mut_bytes()).await.map_err(Error::Read)?;
106
107            let expected_offset = if let Some(previous_entry) = previous_entry {
108                if previous_entry.chunk_type >= entry.chunk_type {
109                    return Err(Error::IndexEntriesOutOfOrder {
110                        prev: previous_entry.chunk_type,
111                        next: entry.chunk_type,
112                    });
113                }
114                previous_entry.offset.get() + previous_entry.length.get()
115            } else {
116                INDEX_LEN + index.length.get()
117            };
118            if entry.offset.get() != expected_offset {
119                return Err(Error::InvalidChunkOffset {
120                    chunk_type: entry.chunk_type,
121                    expected: expected_offset,
122                    actual: entry.offset.get(),
123                });
124            }
125            if entry.offset.get().checked_add(entry.length.get()).is_none() {
126                return Err(Error::InvalidChunkLength {
127                    chunk_type: entry.chunk_type,
128                    offset: entry.offset.get(),
129                    length: entry.length.get(),
130                });
131            }
132
133            match entry.chunk_type {
134                DIR_CHUNK_TYPE => {
135                    dir_index = Some(entry);
136                }
137                DIR_NAMES_CHUNK_TYPE => {
138                    dir_name_index = Some(entry);
139                }
140                // FAR spec does not forbid unknown chunk types
141                _ => {}
142            }
143            previous_entry = Some(entry);
144        }
145        let end_of_last_chunk = if let Some(previous_entry) = previous_entry {
146            previous_entry.offset.get() + previous_entry.length.get()
147        } else {
148            INDEX_LEN
149        };
150        Ok((
151            dir_index.ok_or(Error::MissingDirectoryChunkIndexEntry)?,
152            dir_name_index.ok_or(Error::MissingDirectoryNamesChunkIndexEntry)?,
153            end_of_last_chunk,
154        ))
155    }
156
157    /// Return a list of the items in the archive
158    pub fn list(&self) -> impl ExactSizeIterator<Item = crate::Entry<'_>> {
159        crate::list(&self.directory_entries, &self.path_data)
160    }
161
162    /// Read the entire contents of the entry with the specified path.
163    /// O(log(# directory entries))
164    pub async fn read_file(&mut self, path: &[u8]) -> Result<Vec<u8>, Error> {
165        let entry = crate::find_directory_entry(&self.directory_entries, &self.path_data, path)?;
166        let mut data = vec![
167            0;
168            usize::try_from(entry.data_length.get()).map_err(|_| {
169                Error::ContentChunkDoesNotFitInMemory {
170                    name: path.into(),
171                    chunk_size: entry.data_length.get(),
172                }
173            })?
174        ];
175        let () = self
176            .source
177            .read_at_exact(entry.data_offset.get(), &mut data)
178            .await
179            .map_err(Error::Read)?;
180        Ok(data)
181    }
182
183    /// Get the size in bytes of the entry with the specified path.
184    /// O(log(# directory entries))
185    pub fn get_size(&mut self, path: &[u8]) -> Result<u64, Error> {
186        Ok(crate::find_directory_entry(&self.directory_entries, &self.path_data, path)?
187            .data_length
188            .get())
189    }
190
191    pub fn into_source(self) -> T {
192        self.source
193    }
194}
195
196#[cfg(test)]
197mod tests {
198    use super::*;
199    use crate::tests::example_archive;
200    use assert_matches::assert_matches;
201    use fuchsia_async as fasync;
202    use fuchsia_fs::file::Adapter;
203    use futures::io::Cursor;
204
205    #[fasync::run_singlethreaded(test)]
206    async fn list() {
207        let example = example_archive();
208        let reader = AsyncReader::new(Adapter::new(Cursor::new(&example))).await.unwrap();
209        itertools::assert_equal(
210            reader.list(),
211            [
212                crate::Entry { path: b"a", offset: 4096, length: 2 },
213                crate::Entry { path: b"b", offset: 8192, length: 2 },
214                crate::Entry { path: b"dir/c", offset: 12288, length: 6 },
215            ],
216        );
217    }
218
219    #[fasync::run_singlethreaded(test)]
220    async fn read_file() {
221        let example = example_archive();
222        let mut reader = AsyncReader::new(Adapter::new(Cursor::new(&example))).await.unwrap();
223        for one_name in ["a", "b", "dir/c"].iter().map(|s| s.as_bytes()) {
224            let content = reader.read_file(one_name).await.unwrap();
225            let content_str = std::str::from_utf8(&content).unwrap();
226            let expected = format!("{}\n", std::str::from_utf8(one_name).unwrap());
227            assert_eq!(content_str, &expected);
228        }
229    }
230
231    #[fasync::run_singlethreaded(test)]
232    async fn get_size() {
233        let example = example_archive();
234        let mut reader = AsyncReader::new(Adapter::new(Cursor::new(&example))).await.unwrap();
235        for one_name in ["a", "b", "dir/c"].iter().map(|s| s.as_bytes()) {
236            let returned_size = reader.get_size(one_name).unwrap();
237            let expected_size = one_name.len() + 1;
238            assert_eq!(returned_size, u64::try_from(expected_size).unwrap());
239        }
240    }
241
242    #[fasync::run_singlethreaded(test)]
243    async fn accessors_error_on_missing_path() {
244        let example = example_archive();
245        let mut reader = AsyncReader::new(Adapter::new(Cursor::new(&example))).await.unwrap();
246        assert_matches!(
247            reader.read_file(b"missing-path").await,
248            Err(Error::PathNotPresent(path)) if path == b"missing-path"
249        );
250        assert_matches!(
251            reader.get_size(b"missing-path"),
252            Err(Error::PathNotPresent(path)) if path == b"missing-path"
253        );
254    }
255}