ext4_read_only/
parser.rs

1/*
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2012, 2010 Zheng Liu <lz@freebsd.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 * $FreeBSD$
29 */
30
31// Copyright 2019 The Fuchsia Authors. All rights reserved.
32// Use of this source code is governed by a BSD-style license that can be
33// found in the LICENSE file.
34
35use crate::readers::Reader;
36use crate::structs::{
37    BlockGroupDesc32, BlockGroupDesc64, DirEntry2, DirEntryHeader, EntryType, Extent, ExtentHeader,
38    ExtentIndex, ExtentTreeNode, INode, InvalidAddressErrorType, ParseToStruct, ParsingError,
39    SuperBlock, XattrEntryHeader, XattrHeader, FIRST_BG_PADDING, MINIMUM_INODE_SIZE, MIN_EXT4_SIZE,
40    ROOT_INODE_NUM,
41};
42use once_cell::sync::OnceCell;
43use std::collections::BTreeMap;
44use std::mem::{size_of, size_of_val};
45use std::path::{Component, Path};
46use std::str;
47use zerocopy::byteorder::little_endian::U32 as LEU32;
48use zerocopy::{IntoBytes, SplitByteSlice};
49
50// Assuming/ensuring that we are on a 64bit system where u64 == usize.
51assert_eq_size!(u64, usize);
52
53pub struct Parser {
54    reader: Box<dyn Reader>,
55    super_block: OnceCell<SuperBlock>,
56}
57
58pub type XattrMap = BTreeMap<Vec<u8>, Vec<u8>>;
59
60/// Abstracts over block group descriptors of different size
61enum BlockGroupDescriptor {
62    BGD32(BlockGroupDesc32),
63    BGD64(BlockGroupDesc64),
64}
65
66impl BlockGroupDescriptor {
67    fn inode_table_block(&self) -> u64 {
68        match self {
69            Self::BGD32(bgd) => u64::from(bgd.ext2bgd_i_tables),
70            Self::BGD64(bgd) => {
71                u64::from(bgd.base.ext2bgd_i_tables) + (u64::from(bgd.ext4bgd_i_tables_hi) << 32)
72            }
73        }
74    }
75}
76
77/// EXT4 Parser
78///
79/// Takes in a `Reader` that is able to read arbitrary chunks of data from the filesystem image.
80///
81/// Basic use:
82/// let mut parser = Parser::new(VecReader::new(vec_of_u8));
83/// let tree = parser.build_fuchsia_tree()
84impl Parser {
85    pub fn new(reader: Box<dyn Reader>) -> Self {
86        Parser { reader, super_block: OnceCell::new() }
87    }
88
89    /// Returns the Super Block.
90    ///
91    /// If the super block has been parsed and saved before, return that.
92    /// Else, parse the super block and save it and return it.
93    ///
94    /// We never need to re-parse the super block in this read-only
95    /// implementation.
96    fn super_block(&self) -> Result<&SuperBlock, ParsingError> {
97        self.super_block.get_or_try_init(|| SuperBlock::parse(&self.reader))
98    }
99
100    /// Reads block size from the Super Block.
101    pub fn block_size(&self) -> Result<u64, ParsingError> {
102        self.super_block()?.block_size()
103    }
104
105    /// Reads full raw data from a given block number.
106    fn block(&self, block_number: u64) -> Result<Box<[u8]>, ParsingError> {
107        if block_number == 0 {
108            return Err(ParsingError::InvalidAddress(
109                InvalidAddressErrorType::Lower,
110                0,
111                FIRST_BG_PADDING,
112            ));
113        }
114        let block_size = self.block_size()?;
115        let address = block_number
116            .checked_mul(block_size)
117            .ok_or(ParsingError::BlockNumberOutOfBounds(block_number))?;
118
119        let mut data = vec![0u8; block_size.try_into().unwrap()];
120        self.reader.read(address, data.as_mut_slice()).map_err(Into::<ParsingError>::into)?;
121
122        Ok(data.into_boxed_slice())
123    }
124
125    /// Returns the address of the given `inode_number` within `self.reader`.
126    fn inode_addr(&self, inode_number: u32) -> Result<u64, ParsingError> {
127        if inode_number < 1 {
128            // INode number 0 is not allowed per ext4 spec.
129            return Err(ParsingError::InvalidInode(inode_number));
130        }
131        let sb = self.super_block()?;
132        let block_size = self.block_size()?;
133
134        // The first Block Group starts with:
135        // - 1024 byte padding
136        // - 1024 byte Super Block
137        // Then in the next block, there are many blocks worth of Block Group Descriptors.
138        // If the block size is 2048 bytes or larger, then the 1024 byte padding, and the
139        // Super Block both fit in the first block (0), and the Block Group Descriptors start
140        // at block 1.
141        //
142        // A 1024 byte block size means the padding takes block 0 and the Super Block takes
143        // block 1. This means the Block Group Descriptors start in block 2.
144        let bgd_table_offset = if block_size >= MIN_EXT4_SIZE {
145            // Padding and Super Block both fit in the first block, so offset to the next
146            // block.
147            block_size
148        } else {
149            // Block size is less than 2048. The only valid block size smaller than 2048 is 1024.
150            // Padding and Super Block take one block each, so offset to the third block.
151            block_size * 2
152        };
153
154        let bgd_offset = (inode_number - 1) as u64 / sb.e2fs_ipg.get() as u64
155            * sb.block_group_descriptor_size() as u64;
156        let bgd = if sb.is_64bit() {
157            BlockGroupDescriptor::BGD64(BlockGroupDesc64::from_reader_with_offset(
158                &self.reader,
159                bgd_table_offset + bgd_offset,
160            )?)
161        } else {
162            BlockGroupDescriptor::BGD32(BlockGroupDesc32::from_reader_with_offset(
163                &self.reader,
164                bgd_table_offset + bgd_offset,
165            )?)
166        };
167
168        // Offset could really be anywhere, and the Reader will enforce reading within the
169        // filesystem size. Not much can be checked here.
170        let inode_table_offset =
171            (inode_number - 1) as u64 % sb.e2fs_ipg.get() as u64 * sb.e2fs_inode_size.get() as u64;
172        let inode_addr = (bgd.inode_table_block() * block_size) + inode_table_offset;
173        if inode_addr < MIN_EXT4_SIZE {
174            return Err(ParsingError::InvalidAddress(
175                InvalidAddressErrorType::Lower,
176                inode_addr,
177                MIN_EXT4_SIZE,
178            ));
179        }
180        Ok(inode_addr)
181    }
182
183    /// Reads the INode at the given inode number.
184    pub fn inode(&self, inode_number: u32) -> Result<INode, ParsingError> {
185        INode::from_reader_with_offset(&self.reader, self.inode_addr(inode_number)?)
186    }
187
188    /// Helper function to get the root directory INode.
189    pub fn root_inode(&self) -> Result<INode, ParsingError> {
190        self.inode(ROOT_INODE_NUM)
191    }
192
193    /// Reads all raw data from a given extent leaf node.
194    fn extent_data(&self, extent: &Extent, mut allowance: u64) -> Result<Vec<u8>, ParsingError> {
195        let block_number = extent.target_block_num();
196        let block_count = extent.e_len.get() as u64;
197        let block_size = self.block_size()?;
198        let mut read_len;
199
200        let mut data = Vec::with_capacity((block_size * block_count).try_into().unwrap());
201
202        for i in 0..block_count {
203            let block_data = self.block(block_number + i as u64)?;
204            if allowance >= block_size {
205                read_len = block_size;
206            } else {
207                read_len = allowance;
208            }
209            let block_data = &block_data[0..read_len.try_into().unwrap()];
210            data.append(&mut block_data.to_vec());
211            allowance -= read_len;
212        }
213
214        Ok(data)
215    }
216
217    /// Reads the inode size and raw extent data for a regular file.  Fails if the provided inode is
218    /// not a regular file.
219    pub fn read_extents(&self, inode_num: u32) -> Result<(u64, Vec<Extent>), ParsingError> {
220        let inode = self.inode(inode_num)?;
221
222        // Make sure this is a regular file.
223        const IFMT: u16 = 0xf000;
224        const IFREG: u16 = 0x8000;
225        if u16::from(inode.e2di_mode) & IFMT != IFREG {
226            return Err(ParsingError::NotFile);
227        }
228
229        let root_extent_tree_node = inode.extent_tree_node()?;
230        let mut extents = Vec::new();
231
232        self.iterate_extents_in_tree(&root_extent_tree_node, &mut |extent| {
233            extents.push(extent.clone());
234            Ok(())
235        })?;
236
237        Ok((inode.size(), extents))
238    }
239
240    /// Reads extent data from a leaf node.
241    ///
242    /// # Arguments
243    /// * `extent`: Extent from which to read data from.
244    /// * `data`: Vec where data that is read is added.
245    /// * `allowance`: The maximum number of bytes to read from the extent. The
246    ///    given file allowance is updated on each call to track sizing for an
247    ///    entire extent tree.
248    fn read_extent_data(
249        &self,
250        extent: &Extent,
251        data: &mut Vec<u8>,
252        allowance: &mut u64,
253    ) -> Result<(), ParsingError> {
254        let mut extent_data = self.extent_data(&extent, *allowance)?;
255        let extent_len = extent_data.len() as u64;
256        if extent_len > *allowance {
257            return Err(ParsingError::ExtentUnexpectedLength(extent_len, *allowance));
258        }
259        *allowance -= extent_len;
260        data.append(&mut extent_data);
261        Ok(())
262    }
263
264    /// Reads directory entries from an extent leaf node.
265    fn read_dir_entries(
266        &self,
267        extent: &Extent,
268        entries: &mut Vec<DirEntry2>,
269    ) -> Result<(), ParsingError> {
270        let block_size = self.block_size()?;
271        let target_block_offset = extent.target_block_num() * block_size;
272
273        // The `e2d_reclen` of the last entry will be large enough fill the
274        // remaining space of the block.
275        for block_index in 0..extent.e_len.get() {
276            let mut dir_entry_offset = 0u64;
277            while (dir_entry_offset + size_of::<DirEntryHeader>() as u64) < block_size {
278                let offset =
279                    dir_entry_offset + target_block_offset + (block_index as u64 * block_size);
280
281                let de_header = DirEntryHeader::from_reader_with_offset(&self.reader, offset)?;
282                let mut de = DirEntry2 {
283                    e2d_ino: de_header.e2d_ino,
284                    e2d_reclen: de_header.e2d_reclen,
285                    e2d_namlen: de_header.e2d_namlen,
286                    e2d_type: de_header.e2d_type,
287                    e2d_name: [0u8; 255],
288                };
289                self.reader.read(
290                    offset + size_of::<DirEntryHeader>() as u64,
291                    &mut de.e2d_name[..de.e2d_namlen as usize],
292                )?;
293
294                dir_entry_offset += de.e2d_reclen.get() as u64;
295
296                if de.e2d_ino.get() != 0 {
297                    entries.push(de);
298                }
299            }
300        }
301        Ok(())
302    }
303
304    /// Handles an extent tree leaf node by invoking `extent_handler` for each contained extent.
305    fn iterate_extents_in_leaf<B: SplitByteSlice, F: FnMut(&Extent) -> Result<(), ParsingError>>(
306        &self,
307        extent_tree_node: &ExtentTreeNode<B>,
308        extent_handler: &mut F,
309    ) -> Result<(), ParsingError> {
310        for e_index in 0..extent_tree_node.header.eh_ecount.get() {
311            let start = size_of::<Extent>() * e_index as usize;
312            let end = start + size_of::<Extent>() as usize;
313            let e = Extent::to_struct_ref(
314                &(extent_tree_node.entries)[start..end],
315                ParsingError::InvalidExtent(start as u64),
316            )?;
317
318            extent_handler(e)?;
319        }
320
321        Ok(())
322    }
323
324    /// Handles traversal down an extent tree.
325    fn iterate_extents_in_tree<B: SplitByteSlice, F: FnMut(&Extent) -> Result<(), ParsingError>>(
326        &self,
327        extent_tree_node: &ExtentTreeNode<B>,
328        extent_handler: &mut F,
329    ) -> Result<(), ParsingError> {
330        let block_size = self.block_size()?;
331
332        match extent_tree_node.header.eh_depth.get() {
333            0 => {
334                self.iterate_extents_in_leaf(extent_tree_node, extent_handler)?;
335            }
336            1..=4 => {
337                for e_index in 0..extent_tree_node.header.eh_ecount.get() {
338                    let start: usize = size_of::<Extent>() * e_index as usize;
339                    let end = start + size_of::<Extent>();
340                    let e = ExtentIndex::to_struct_ref(
341                        &(extent_tree_node.entries)[start..end],
342                        ParsingError::InvalidExtent(start as u64),
343                    )?;
344
345                    let next_level_offset = e.target_block_num() as u64 * block_size;
346
347                    let next_extent_header =
348                        ExtentHeader::from_reader_with_offset(&self.reader, next_level_offset)?;
349
350                    let entry_count = next_extent_header.eh_ecount.get() as usize;
351                    let entry_size = match next_extent_header.eh_depth.get() {
352                        0 => size_of::<Extent>(),
353                        _ => size_of::<ExtentIndex>(),
354                    };
355                    let node_size = size_of::<ExtentHeader>() + (entry_count * entry_size);
356
357                    let mut data = vec![0u8; node_size];
358                    self.reader.read(next_level_offset, data.as_mut_slice())?;
359
360                    let next_level_node = ExtentTreeNode::parse(data.as_slice())
361                        .ok_or(ParsingError::InvalidExtent(next_level_offset))?;
362
363                    self.iterate_extents_in_tree(&next_level_node, extent_handler)?;
364                }
365            }
366            _ => return Err(ParsingError::InvalidExtentHeader),
367        };
368
369        Ok(())
370    }
371
372    /// Lists directory entries from the directory that is the given Inode.
373    ///
374    /// Errors if the Inode does not map to a Directory.
375    pub fn entries_from_inode(&self, inode: &INode) -> Result<Vec<DirEntry2>, ParsingError> {
376        let root_extent_tree_node = inode.extent_tree_node()?;
377        let mut dir_entries = Vec::new();
378
379        self.iterate_extents_in_tree(&root_extent_tree_node, &mut |extent| {
380            self.read_dir_entries(extent, &mut dir_entries)
381        })?;
382
383        Ok(dir_entries)
384    }
385
386    /// Gets any DirEntry2 that isn't root.
387    ///
388    /// Root doesn't have a DirEntry2.
389    ///
390    /// When dynamic loading of files is supported, this is the required mechanism.
391    pub fn entry_at_path(&self, path: &Path) -> Result<DirEntry2, ParsingError> {
392        let root_inode = self.root_inode()?;
393        let root_entries = self.entries_from_inode(&root_inode)?;
394        let mut entry_map = DirEntry2::as_hash_map(root_entries)?;
395
396        let mut components = path.components().peekable();
397        let mut component = components.next();
398
399        while component != None {
400            match component {
401                Some(Component::RootDir) => {
402                    // Skip
403                }
404                Some(Component::Normal(name)) => {
405                    let name = name.to_str().ok_or(ParsingError::InvalidInputPath)?;
406                    if let Some(entry) = entry_map.remove(name) {
407                        if components.peek() == None {
408                            return Ok(entry);
409                        }
410                        match EntryType::from_u8(entry.e2d_type)? {
411                            EntryType::Directory => {
412                                let inode = self.inode(entry.e2d_ino.get())?;
413                                entry_map =
414                                    DirEntry2::as_hash_map(self.entries_from_inode(&inode)?)?;
415                            }
416                            _ => {
417                                break;
418                            }
419                        }
420                    }
421                }
422                _ => {
423                    break;
424                }
425            }
426            component = components.next();
427        }
428
429        match path.to_str() {
430            Some(s) => Err(ParsingError::PathNotFound(s.to_string())),
431            None => Err(ParsingError::PathNotFound(
432                "Bad path - was not able to convert into string".to_string(),
433            )),
434        }
435    }
436
437    /// Reads all raw data for a given inode.
438    ///
439    /// For a file, this will be the file data. For a symlink,
440    /// this will be the symlink target.
441    pub fn read_data(&self, inode_num: u32) -> Result<Vec<u8>, ParsingError> {
442        let inode = self.inode(inode_num)?;
443        let mut size_remaining = inode.size();
444        let mut data = Vec::with_capacity(size_remaining.try_into().unwrap());
445
446        // Check for symlink with inline data.
447        if u16::from(inode.e2di_mode) & 0xa000 != 0 && u32::from(inode.e2di_nblock) == 0 {
448            data.extend_from_slice(&inode.e2di_blocks[..inode.size().try_into().unwrap()]);
449            return Ok(data);
450        }
451
452        let root_extent_tree_node = inode.extent_tree_node()?;
453        let mut extents = Vec::new();
454
455        self.iterate_extents_in_tree(&root_extent_tree_node, &mut |extent| {
456            extents.push(extent.clone());
457            Ok(())
458        })?;
459
460        let block_size = self.block_size()?;
461
462        // Summarized from https://www.kernel.org/doc/ols/2007/ols2007v2-pages-21-34.pdf,
463        // Section 2.2: Extent and ExtentHeader entries must be sorted by logical block number. This
464        // enforces that when the extent tree is traversed depth first that a list of extents sorted
465        // by logical block number is produced. This is a requirement to produce the proper ordering
466        // of bytes within `data` here.
467        for extent in extents {
468            let buffer_offset = extent.e_blk.get() as u64 * block_size;
469
470            // File may be sparse. Sparse files will have gaps
471            // between logical blocks. Fill in any gaps with zeros.
472            if buffer_offset > data.len() as u64 {
473                size_remaining -= buffer_offset - data.len() as u64;
474                data.resize(buffer_offset.try_into().unwrap(), 0);
475            }
476
477            self.read_extent_data(&extent, &mut data, &mut size_remaining)?;
478        }
479
480        // If there are zero pages at the end of the file, they won't appear in the extents list.
481        // Pad the data with zeroes to the full file length.
482        // TODO(https://fxbug.dev/42073237): Add a test for this behavior, once better test infra exists.
483        data.resize(inode.size().try_into().unwrap(), 0);
484        Ok(data)
485    }
486
487    /// Progress through the entire directory tree starting from the given INode.
488    ///
489    /// If given the root directory INode, this will process through every directory entry in the
490    /// filesystem in a DFS manner.
491    ///
492    /// Takes in a closure that will be called for each entry found.
493    /// Closure should return `Ok(true)` in order to continue the process, otherwise the process
494    /// will stop.
495    ///
496    /// Returns Ok(true) if it has indexed its subtree successfully. Otherwise, if the receiver
497    /// chooses to cancel indexing early, an Ok(false) is returned and propagated up.
498    pub fn index<R>(
499        &self,
500        inode: INode,
501        prefix: Vec<&str>,
502        receiver: &mut R,
503    ) -> Result<bool, ParsingError>
504    where
505        R: FnMut(&Parser, Vec<&str>, &DirEntry2) -> Result<bool, ParsingError>,
506    {
507        let entries = self.entries_from_inode(&inode)?;
508        for entry in entries {
509            let entry_name = entry.name()?;
510            if entry_name == "." || entry_name == ".." {
511                continue;
512            }
513            let mut name = Vec::new();
514            name.append(&mut prefix.clone());
515            name.push(entry_name);
516            if !receiver(self, name.clone(), &entry)? {
517                return Ok(false);
518            }
519            if EntryType::from_u8(entry.e2d_type)? == EntryType::Directory {
520                let inode = self.inode(entry.e2d_ino.get())?;
521                if !self.index(inode, name, receiver)? {
522                    return Ok(false);
523                }
524            }
525        }
526
527        Ok(true)
528    }
529
530    /// Returns the xattrs associated with `inode_number`.
531    pub fn inode_xattrs(&self, inode_number: u32) -> Result<XattrMap, ParsingError> {
532        let mut xattrs = BTreeMap::new();
533
534        let inode_addr = self.inode_addr(inode_number).expect("Couldn't get inode address");
535        let inode =
536            INode::from_reader_with_offset(&self.reader, inode_addr).expect("Failed reader");
537
538        let sb = self.super_block().expect("No super block for inode");
539        let xattr_magic_addr = inode_addr
540            + MINIMUM_INODE_SIZE
541            + u64::from(inode.e4di_extra_isize(sb).unwrap_or_default());
542
543        let mut magic = LEU32::ZERO;
544        self.reader.read(xattr_magic_addr, magic.as_mut_bytes()).expect("Failed to read xattr");
545        if magic.get() == Self::XATTR_MAGIC {
546            let first_entry = xattr_magic_addr + size_of_val(&magic) as u64;
547            self.read_xattr_entries_from_inode(
548                first_entry,
549                inode_addr + (sb.e2fs_inode_size.get() as u64),
550                &mut xattrs,
551            )?;
552        }
553
554        let block_number: u64 = inode.facl();
555        if block_number > 0 {
556            let block = self.block(block_number).expect("Couldn't find block");
557            Self::read_xattr_entries_from_block(&block, &mut xattrs)?;
558        }
559
560        Ok(xattrs)
561    }
562
563    const XATTR_ALIGNMENT: u64 = 4;
564    const XATTR_MAGIC: u32 = 0xea020000;
565
566    fn round_up_to_align(x: u64, align: u64) -> u64 {
567        let spare = x % align;
568        if spare > 0 {
569            x.checked_add(align - spare).expect("Overflow when aligning")
570        } else {
571            x
572        }
573    }
574
575    fn is_valid_xattr_entry_header(header: &XattrEntryHeader) -> bool {
576        !(header.e_name_len == 0
577            && header.e_name_index == 0
578            && header.e_value_offs.get() == 0
579            && header.e_value_inum.get() == 0)
580    }
581
582    fn xattr_prefix_for_name_index(header: &XattrEntryHeader) -> Vec<u8> {
583        match header.e_name_index {
584            1 => b"user.".to_vec(),
585            2 => b"system.posix_acl_access.".to_vec(),
586            3 => b"system.posix_acl_default.".to_vec(),
587            4 => b"trusted.".to_vec(),
588            6 => b"security.".to_vec(),
589            7 => b"system.".to_vec(),
590            8 => b"system.richacl".to_vec(),
591            _ => b"".to_vec(),
592        }
593    }
594
595    /// Reads all the xattr entries, stored in the inode, from `entries_addr` into `xattrs`.
596    fn read_xattr_entries_from_inode(
597        &self,
598        mut entries_addr: u64,
599        inode_end: u64,
600        xattrs: &mut XattrMap,
601    ) -> Result<(), ParsingError> {
602        let value_base_addr = entries_addr;
603        while entries_addr + (std::mem::size_of::<XattrEntryHeader>() as u64) < inode_end {
604            let head = XattrEntryHeader::from_reader_with_offset(&self.reader, entries_addr)?;
605            if !Self::is_valid_xattr_entry_header(&head) {
606                break;
607            }
608
609            let prefix = Self::xattr_prefix_for_name_index(&head);
610            let mut name = Vec::with_capacity(prefix.len() + head.e_name_len as usize);
611            name.extend_from_slice(&prefix);
612            name.resize(prefix.len() + head.e_name_len as usize, 0);
613
614            self.reader.read(
615                entries_addr + size_of::<XattrEntryHeader>() as u64,
616                &mut name[prefix.len()..],
617            )?;
618
619            let mut value = vec![0u8; head.e_value_size.get() as usize];
620            self.reader.read(value_base_addr + u64::from(head.e_value_offs), &mut value)?;
621            xattrs.insert(name, value);
622
623            entries_addr += size_of::<XattrEntryHeader>() as u64 + head.e_name_len as u64;
624            entries_addr = Self::round_up_to_align(entries_addr, Self::XATTR_ALIGNMENT);
625        }
626        Ok(())
627    }
628
629    /// Reads all the xattr entries, stored in the inode, from `entries_addr` into `xattrs`.
630    fn read_xattr_entries_from_block(
631        block: &[u8],
632        xattrs: &mut XattrMap,
633    ) -> Result<(), ParsingError> {
634        let head = XattrHeader::to_struct_ref(
635            &block[..std::mem::size_of::<XattrHeader>()],
636            ParsingError::Incompatible("Invalid XattrHeader".to_string()),
637        )?;
638
639        if head.e_magic.get() != Self::XATTR_MAGIC {
640            return Ok(());
641        }
642
643        let mut offset = Self::round_up_to_align(
644            std::mem::size_of::<XattrHeader>() as u64,
645            Self::XATTR_ALIGNMENT * 2,
646        ) as usize;
647
648        while offset + std::mem::size_of::<XattrEntryHeader>() < block.len() {
649            let head = XattrEntryHeader::to_struct_ref(
650                &block[offset..offset + std::mem::size_of::<XattrEntryHeader>()],
651                ParsingError::Incompatible("Invalid XattrEntryHeader".to_string()),
652            )?;
653
654            if !Self::is_valid_xattr_entry_header(&head) {
655                break;
656            }
657
658            let name_start = offset + std::mem::size_of::<XattrEntryHeader>();
659            let name_end = name_start + head.e_name_len as usize;
660            let mut name = Self::xattr_prefix_for_name_index(&head);
661            name.extend_from_slice(&block[name_start..name_end]);
662
663            let value_start = head.e_value_offs.get() as usize;
664            let value_end = value_start + head.e_value_size.get() as usize;
665            let value = block[value_start..value_end].to_vec();
666            xattrs.insert(name, value);
667
668            offset = Self::round_up_to_align(name_end as u64, 4) as usize;
669        }
670
671        Ok(())
672    }
673
674    /// Returns a `Simple` filesystem as built by `TreeBuilder.build()`.
675    #[cfg(target_os = "fuchsia")]
676    pub fn build_fuchsia_tree(
677        &self,
678    ) -> Result<std::sync::Arc<vfs::directory::immutable::Simple>, ParsingError> {
679        use vfs::file::vmo::read_only;
680        use vfs::tree_builder::TreeBuilder;
681
682        let root_inode = self.root_inode()?;
683        let mut tree = TreeBuilder::empty_dir();
684
685        self.index(root_inode, Vec::new(), &mut |my_self, path, entry| {
686            let entry_type = EntryType::from_u8(entry.e2d_type)?;
687            match entry_type {
688                EntryType::RegularFile => {
689                    let data = my_self.read_data(entry.e2d_ino.into())?;
690                    tree.add_entry(path.clone(), read_only(data))
691                        .map_err(|_| ParsingError::BadFile(path.join("/")))?;
692                }
693                EntryType::Directory => {
694                    tree.add_empty_dir(path.clone())
695                        .map_err(|_| ParsingError::BadDirectory(path.join("/")))?;
696                }
697                _ => {
698                    // TODO(https://fxbug.dev/42073143): Handle other types.
699                }
700            }
701            Ok(true)
702        })?;
703
704        Ok(tree.build())
705    }
706}
707
708#[cfg(test)]
709mod tests {
710    use crate::parser::Parser;
711    use crate::readers::VecReader;
712    use crate::structs::EntryType;
713    use maplit::hashmap;
714    use sha2::{Digest, Sha256};
715    use std::collections::{HashMap, HashSet};
716    use std::path::Path;
717    use std::{fs, str};
718    use test_case::test_case;
719
720    #[fuchsia::test]
721    fn list_root_1_file() {
722        let data = fs::read("/pkg/data/1file.img").expect("Unable to read file");
723        let parser = Parser::new(Box::new(VecReader::new(data)));
724        assert!(parser.super_block().expect("Super Block").check_magic().is_ok());
725        let root_inode = parser.root_inode().expect("Parse INode");
726        let entries = parser.entries_from_inode(&root_inode).expect("List entries");
727        let mut expected_entries = vec!["file1", "lost+found", "..", "."];
728
729        for de in &entries {
730            assert_eq!(expected_entries.pop().unwrap(), de.name().unwrap());
731        }
732        assert_eq!(expected_entries.len(), 0);
733    }
734
735    #[test_case(
736        "/pkg/data/nest.img",
737        vec!["inner", "file1", "lost+found", "..", "."];
738        "fs with a single directory")]
739    #[test_case(
740        "/pkg/data/extents.img",
741        vec!["trailingzeropages", "a", "smallfile", "largefile", "sparsefile", "lost+found", "..", "."];
742        "fs with multiple files with multiple extents")]
743    fn list_root(ext4_path: &str, mut expected_entries: Vec<&str>) {
744        let data = fs::read(ext4_path).expect("Unable to read file");
745        let parser = Parser::new(Box::new(VecReader::new(data)));
746        assert!(parser.super_block().expect("Super Block").check_magic().is_ok());
747        let root_inode = parser.root_inode().expect("Parse INode");
748        let entries = parser.entries_from_inode(&root_inode).expect("List entries");
749
750        for de in &entries {
751            assert_eq!(expected_entries.pop().unwrap(), de.name().unwrap());
752        }
753        assert_eq!(expected_entries.len(), 0);
754    }
755
756    #[fuchsia::test]
757    fn get_from_path() {
758        let data = fs::read("/pkg/data/nest.img").expect("Unable to read file");
759        let parser = Parser::new(Box::new(VecReader::new(data)));
760        assert!(parser.super_block().expect("Super Block").check_magic().is_ok());
761
762        let entry = parser.entry_at_path(Path::new("/inner")).expect("Entry at path");
763        assert_eq!(entry.e2d_ino.get(), 12);
764        assert_eq!(entry.name().unwrap(), "inner");
765
766        let entry = parser.entry_at_path(Path::new("/inner/file2")).expect("Entry at path");
767        assert_eq!(entry.e2d_ino.get(), 17);
768        assert_eq!(entry.name().unwrap(), "file2");
769    }
770
771    #[fuchsia::test]
772    fn read_data() {
773        let data = fs::read("/pkg/data/1file.img").expect("Unable to read file");
774        let parser = Parser::new(Box::new(VecReader::new(data)));
775        assert!(parser.super_block().expect("Super Block").check_magic().is_ok());
776
777        let entry = parser.entry_at_path(Path::new("file1")).expect("Entry at path");
778        assert_eq!(entry.e2d_ino.get(), 15);
779        assert_eq!(entry.name().unwrap(), "file1");
780
781        let data = parser.read_data(entry.e2d_ino.into()).expect("File data");
782        let compare = "file1 contents.\n";
783        assert_eq!(data.len(), compare.len());
784        assert_eq!(str::from_utf8(data.as_slice()).expect("File data"), compare);
785    }
786
787    #[fuchsia::test]
788    fn fail_inode_zero() {
789        let data = fs::read("/pkg/data/1file.img").expect("Unable to read file");
790        let parser = Parser::new(Box::new(VecReader::new(data)));
791        assert!(parser.inode(0).is_err());
792    }
793
794    #[fuchsia::test]
795    fn index() {
796        let data = fs::read("/pkg/data/nest.img").expect("Unable to read file");
797        let parser = Parser::new(Box::new(VecReader::new(data)));
798        assert!(parser.super_block().expect("Super Block").check_magic().is_ok());
799
800        let mut count = 0;
801        let mut entries: HashSet<u32> = HashSet::new();
802        let root_inode = parser.root_inode().expect("Root inode");
803
804        parser
805            .index(root_inode, Vec::new(), &mut |_, _, entry| {
806                count += 1;
807
808                // Make sure each inode only appears once.
809                assert_ne!(entries.contains(&entry.e2d_ino.get()), true);
810                entries.insert(entry.e2d_ino.get());
811
812                Ok(true)
813            })
814            .expect("Index");
815
816        assert_eq!(count, 4);
817    }
818
819    #[fuchsia::test]
820    fn xattr() {
821        let data = fs::read("/pkg/data/xattr.img").expect("Unable to read file");
822        let parser = Parser::new(Box::new(VecReader::new(data)));
823        assert!(parser.super_block().expect("Super Block").check_magic().is_ok());
824        let root_inode = parser.root_inode().expect("Root inode");
825        let mut found_files = HashSet::new();
826
827        parser
828            .index(root_inode, Vec::new(), &mut |_, _, entry| {
829                let name = entry.e2d_name;
830                let inode = entry.e2d_ino.get();
831                let attributes = parser.inode_xattrs(inode).expect("Extended attributes");
832                match name {
833                    name if &name[0..10] == b"lost+found" => {
834                        assert_eq!(attributes.len(), 0);
835                        found_files.insert("lost+found");
836                    }
837                    name if &name[0..5] == b"file1" => {
838                        assert_eq!(attributes.len(), 1);
839                        assert_eq!(attributes[&b"user.test".to_vec()], b"test value".to_vec());
840                        found_files.insert("file1");
841                    }
842                    name if &name[0..9] == b"file_many" => {
843                        assert_eq!(attributes.len(), 6);
844                        assert_eq!(
845                            attributes[&b"user.long".to_vec()],
846                            b"vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv".to_vec()
847                        );
848                        found_files.insert("file_many");
849                    }
850                    name if &name[0..6] == b"subdir" => {
851                        assert_eq!(attributes.len(), 1);
852                        assert_eq!(attributes[&b"user.type".to_vec()], b"dir".to_vec());
853                        found_files.insert("subdir");
854                    }
855                    name if &name[0..5] == b"file2" => {
856                        assert_eq!(attributes.len(), 2);
857                        assert_eq!(
858                            attributes[&b"user.test_one".to_vec()],
859                            b"test value 1".to_vec()
860                        );
861                        assert_eq!(
862                            attributes[&b"user.test_two".to_vec()],
863                            b"test value 2".to_vec()
864                        );
865                        found_files.insert("file2");
866                    }
867                    _ => {}
868                }
869                Ok(true)
870            })
871            .expect("Index");
872
873        assert_eq!(found_files.len(), 5);
874    }
875
876    #[test_case(
877        "/pkg/data/extents.img",
878        hashmap!{
879            "largefile".to_string() => "de2cf635ae4e0e727f1e412f978001d6a70d2386dc798d4327ec8c77a8e4895d".to_string(),
880            "smallfile".to_string() => "5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03".to_string(),
881            "sparsefile".to_string() => "3f411e42c1417cd8845d7144679812be3e120318d843c8c6e66d8b2c47a700e9".to_string(),
882            "trailingzeropages".to_string() => "afc5cc689fd3cb8d00c147d60dc911a70d36b7afb03cc7f15de9c78a52be978d".to_string(),
883            "a/multi/dir/path/within/this/crowded/extents/test/img/empty".to_string() => "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855".to_string(),
884        },
885        vec!["a/multi/dir/path/within/this/crowded/extents/test/img", "lost+found"];
886        "fs with multiple files with multiple extents")]
887    #[test_case(
888        "/pkg/data/1file.img",
889        hashmap!{
890            "file1".to_string() => "6bc35bfb2ca96c75a1fecde205693c19a827d4b04e90ace330048f3e031487dd".to_string(),
891        },
892        vec!["lost+found"];
893        "fs with one small file")]
894    #[test_case(
895        "/pkg/data/nest.img",
896        hashmap!{
897            "file1".to_string() => "6bc35bfb2ca96c75a1fecde205693c19a827d4b04e90ace330048f3e031487dd".to_string(),
898            "inner/file2".to_string() => "215ca145cbac95c9e2a6f5ff91ca1887c837b18e5f58fd2a7a16e2e5a3901e10".to_string(),
899        },
900        vec!["inner", "lost+found"];
901        "fs with a single directory")]
902    #[test_case(
903        "/pkg/data/nest64.img",
904        hashmap!{
905            "file1".to_string() => "6bc35bfb2ca96c75a1fecde205693c19a827d4b04e90ace330048f3e031487dd".to_string(),
906            "inner/file2".to_string() => "215ca145cbac95c9e2a6f5ff91ca1887c837b18e5f58fd2a7a16e2e5a3901e10".to_string(),
907        },
908        vec!["inner", "lost+found"];
909        "fs with 64bit enabled and a single directory")]
910    #[test_case(
911        "/pkg/data/longdir.img",
912        {
913            let mut hash = HashMap::new();
914            for i in 1..=1000 {
915                hash.insert(i.to_string(), "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855".to_string());
916            }
917            hash
918        },
919        vec!["lost+found"];
920        "fs with many entries in a directory")]
921    fn check_data(
922        ext4_path: &str,
923        mut file_hashes: HashMap<String, String>,
924        expected_dirs: Vec<&str>,
925    ) {
926        let data = fs::read(ext4_path).expect("Unable to read file");
927        let parser = Parser::new(Box::new(VecReader::new(data)));
928        assert!(parser.super_block().expect("Super Block").check_magic().is_ok());
929
930        let root_inode = parser.root_inode().expect("Root inode");
931
932        parser
933            .index(root_inode, Vec::new(), &mut |my_self, path, entry| {
934                let entry_type = EntryType::from_u8(entry.e2d_type).expect("Entry Type");
935                let file_path = path.join("/");
936
937                match entry_type {
938                    EntryType::RegularFile => {
939                        let data = my_self.read_data(entry.e2d_ino.into()).expect("File data");
940
941                        let mut hasher = Sha256::new();
942                        hasher.update(&data);
943                        assert_eq!(
944                            file_hashes.remove(&file_path).unwrap(),
945                            hex::encode(hasher.finalize())
946                        );
947                    }
948                    EntryType::Directory => {
949                        let mut found = false;
950
951                        // These should be the only possible directories.
952                        for expected_dir in expected_dirs.iter() {
953                            if expected_dir.starts_with(&file_path) {
954                                found = true;
955                                break;
956                            }
957                        }
958                        assert!(found, "Unexpected path {}", file_path);
959                    }
960                    _ => {
961                        assert!(false, "No other types should exist in this image.");
962                    }
963                }
964                Ok(true)
965            })
966            .expect("Index");
967        assert!(file_hashes.is_empty(), "Expected files were not found {:?}", file_hashes);
968    }
969}