1use crate::{
6 DirectoryEntry, Error, Index, IndexEntry, DIRECTORY_ENTRY_LEN, DIR_CHUNK_TYPE,
7 DIR_NAMES_CHUNK_TYPE, INDEX_ENTRY_LEN, INDEX_LEN, MAGIC_INDEX_VALUE,
8};
9use std::convert::TryInto as _;
10use std::io::{Read, Seek, SeekFrom};
11use zerocopy::IntoBytes as _;
12
13#[derive(Debug)]
15pub struct Reader<T>
16where
17 T: Read + Seek,
18{
19 source: T,
20 directory_entries: Box<[DirectoryEntry]>,
21 path_data: Box<[u8]>,
22}
23
24impl<T> Reader<T>
25where
26 T: Read + Seek,
27{
28 pub fn new(mut source: T) -> Result<Self, Error> {
30 let index = Self::read_index_header(&mut source)?;
31 let (dir_index, dir_name_index, end_of_last_non_content_chunk) =
32 Self::read_index_entries(&mut source, &index)?;
33 let stream_len = source.seek(SeekFrom::End(0)).map_err(Error::Seek)?;
34
35 if !dir_index.length.get().is_multiple_of(DIRECTORY_ENTRY_LEN) {
37 return Err(Error::InvalidDirectoryChunkLen(dir_index.length.get()));
38 }
39 let mut directory_entries =
40 vec![
41 DirectoryEntry::default();
42 (dir_index.length.get() / DIRECTORY_ENTRY_LEN)
43 .try_into()
44 .map_err(|_| { Error::InvalidDirectoryChunkLen(dir_index.length.get()) })?
45 ];
46 source.seek(SeekFrom::Start(dir_index.offset.get())).map_err(Error::Seek)?;
47 source.read_exact(directory_entries.as_mut_bytes()).map_err(Error::Read)?;
48 let directory_entries = directory_entries.into_boxed_slice();
49
50 if !dir_name_index.length.get().is_multiple_of(8)
52 || dir_name_index.length.get() > stream_len
53 {
54 return Err(Error::InvalidDirectoryNamesChunkLen(dir_name_index.length.get()));
55 }
56 let path_data_length = dir_name_index
57 .length
58 .get()
59 .try_into()
60 .map_err(|_| Error::InvalidDirectoryNamesChunkLen(dir_name_index.length.get()))?;
61 let mut path_data = vec![0; path_data_length];
62 source.seek(SeekFrom::Start(dir_name_index.offset.get())).map_err(Error::Seek)?;
63 source.read_exact(path_data.as_mut_slice()).map_err(Error::Read)?;
64 let path_data = path_data.into_boxed_slice();
65
66 let () = crate::validate_directory_entries_and_paths(
67 &directory_entries,
68 &path_data,
69 stream_len,
70 end_of_last_non_content_chunk,
71 )?;
72
73 Ok(Self { source, directory_entries, path_data })
74 }
75
76 fn read_index_header(source: &mut T) -> Result<Index, Error> {
78 let mut index = Index::default();
79 source.read_exact(index.as_mut_bytes()).map_err(Error::Read)?;
80 if index.magic != MAGIC_INDEX_VALUE {
81 Err(Error::InvalidMagic(index.magic))
82 } else if !index.length.get().is_multiple_of(INDEX_ENTRY_LEN)
83 || INDEX_LEN.checked_add(index.length.get()).is_none()
84 {
85 Err(Error::InvalidIndexEntriesLen(index.length.get()))
86 } else {
87 Ok(index)
88 }
89 }
90
91 fn read_index_entries(
94 source: &mut T,
95 index: &Index,
96 ) -> Result<(IndexEntry, IndexEntry, u64), Error> {
97 let mut dir_index: Option<IndexEntry> = None;
98 let mut dir_name_index: Option<IndexEntry> = None;
99 let mut previous_entry: Option<IndexEntry> = None;
100 for _ in 0..index.length.get() / INDEX_ENTRY_LEN {
101 let mut entry = IndexEntry::default();
102 source.read_exact(entry.as_mut_bytes()).map_err(Error::Read)?;
103
104 let expected_offset = if let Some(previous_entry) = previous_entry {
105 if previous_entry.chunk_type >= entry.chunk_type {
106 return Err(Error::IndexEntriesOutOfOrder {
107 prev: previous_entry.chunk_type,
108 next: entry.chunk_type,
109 });
110 }
111 previous_entry.offset.get() + previous_entry.length.get()
112 } else {
113 INDEX_LEN + index.length.get()
114 };
115 if entry.offset.get() != expected_offset {
116 return Err(Error::InvalidChunkOffset {
117 chunk_type: entry.chunk_type,
118 expected: expected_offset,
119 actual: entry.offset.get(),
120 });
121 }
122 if entry.offset.get().checked_add(entry.length.get()).is_none() {
123 return Err(Error::InvalidChunkLength {
124 chunk_type: entry.chunk_type,
125 offset: entry.offset.get(),
126 length: entry.length.get(),
127 });
128 }
129
130 match entry.chunk_type {
131 DIR_CHUNK_TYPE => {
132 dir_index = Some(entry);
133 }
134 DIR_NAMES_CHUNK_TYPE => {
135 dir_name_index = Some(entry);
136 }
137 _ => {}
139 }
140 previous_entry = Some(entry);
141 }
142 let end_of_last_chunk = if let Some(previous_entry) = previous_entry {
143 previous_entry.offset.get() + previous_entry.length.get()
144 } else {
145 INDEX_LEN
146 };
147 Ok((
148 dir_index.ok_or(Error::MissingDirectoryChunkIndexEntry)?,
149 dir_name_index.ok_or(Error::MissingDirectoryNamesChunkIndexEntry)?,
150 end_of_last_chunk,
151 ))
152 }
153
154 pub fn list(&self) -> impl ExactSizeIterator<Item = crate::Entry<'_>> {
156 crate::list(&self.directory_entries, &self.path_data)
157 }
158
159 pub fn read_file(&mut self, path: &[u8]) -> Result<Vec<u8>, Error> {
162 let entry = crate::find_directory_entry(&self.directory_entries, &self.path_data, path)?;
163 let mut data = vec![
164 0;
165 usize::try_from(entry.data_length.get()).map_err(|_| {
166 Error::ContentChunkDoesNotFitInMemory {
167 name: path.into(),
168 chunk_size: entry.data_length.get(),
169 }
170 })?
171 ];
172 let _: u64 =
173 self.source.seek(SeekFrom::Start(entry.data_offset.get())).map_err(Error::Seek)?;
174 let () = self.source.read_exact(&mut data).map_err(Error::Read)?;
175 Ok(data)
176 }
177
178 pub fn get_size(&mut self, path: &[u8]) -> Result<u64, Error> {
181 Ok(crate::find_directory_entry(&self.directory_entries, &self.path_data, path)?
182 .data_length
183 .get())
184 }
185}
186
187#[cfg(test)]
188mod tests {
189 use super::*;
190 use crate::tests::example_archive;
191 use assert_matches::assert_matches;
192 use std::io::Cursor;
193
194 #[test]
195 fn list() {
196 let example = example_archive();
197 let reader = Reader::new(Cursor::new(&example)).unwrap();
198 itertools::assert_equal(
199 reader.list(),
200 [
201 crate::Entry { path: b"a", offset: 4096, length: 2 },
202 crate::Entry { path: b"b", offset: 8192, length: 2 },
203 crate::Entry { path: b"dir/c", offset: 12288, length: 6 },
204 ],
205 );
206 }
207
208 #[test]
209 fn read_file() {
210 let example = example_archive();
211 let mut reader = Reader::new(Cursor::new(&example)).unwrap();
212 for one_name in ["a", "b", "dir/c"].iter().map(|s| s.as_bytes()) {
213 let content = reader.read_file(one_name).unwrap();
214 let content_str = std::str::from_utf8(&content).unwrap();
215 let expected = format!("{}\n", std::str::from_utf8(one_name).unwrap());
216 assert_eq!(content_str, &expected);
217 }
218 }
219
220 #[test]
221 fn get_size() {
222 let example = example_archive();
223 let mut reader = Reader::new(Cursor::new(&example)).unwrap();
224 for one_name in ["a", "b", "dir/c"].iter().map(|s| s.as_bytes()) {
225 let returned_size = reader.get_size(one_name).unwrap();
226 let expected_size = one_name.len() + 1;
227 assert_eq!(returned_size, u64::try_from(expected_size).unwrap());
228 }
229 }
230
231 #[test]
232 fn accessors_error_on_missing_path() {
233 let example = example_archive();
234 let mut reader = Reader::new(Cursor::new(&example)).unwrap();
235 assert_matches!(
236 reader.read_file(b"missing-path"),
237 Err(Error::PathNotPresent(path)) if path == b"missing-path"
238 );
239 assert_matches!(
240 reader.get_size(b"missing-path"),
241 Err(Error::PathNotPresent(path)) if path == b"missing-path"
242 );
243 }
244}