Skip to main content

ext4_read_only/
parser.rs

1/*
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2012, 2010 Zheng Liu <lz@freebsd.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 * $FreeBSD$
29 */
30
31// Copyright 2019 The Fuchsia Authors. All rights reserved.
32// Use of this source code is governed by a BSD-style license that can be
33// found in the LICENSE file.
34
35use crate::readers::Reader;
36use crate::structs::{
37    BlockGroupDesc32, BlockGroupDesc64, DirEntry2, DirEntryHeader, EntryType, Extent, ExtentHeader,
38    ExtentIndex, ExtentTreeNode, FIRST_BG_PADDING, INode, InvalidAddressErrorType, MIN_EXT4_SIZE,
39    MINIMUM_INODE_SIZE, ParseToStruct, ParsingError, ROOT_INODE_NUM, SuperBlock, XattrEntryHeader,
40    XattrHeader,
41};
42use once_cell::sync::OnceCell;
43use std::collections::BTreeMap;
44use std::mem::{size_of, size_of_val};
45use std::path::{Component, Path};
46use std::str;
47use zerocopy::byteorder::little_endian::U32 as LEU32;
48use zerocopy::{IntoBytes, SplitByteSlice};
49
50// Assuming/ensuring that we are on a 64bit system where u64 == usize.
51assert_eq_size!(u64, usize);
52
53pub struct Parser {
54    reader: Box<dyn Reader>,
55    super_block: OnceCell<SuperBlock>,
56}
57
58pub type XattrMap = BTreeMap<Vec<u8>, Vec<u8>>;
59
60/// Abstracts over block group descriptors of different size
61enum BlockGroupDescriptor {
62    BGD32(BlockGroupDesc32),
63    BGD64(BlockGroupDesc64),
64}
65
66impl BlockGroupDescriptor {
67    fn inode_table_block(&self) -> u64 {
68        match self {
69            Self::BGD32(bgd) => u64::from(bgd.ext2bgd_i_tables),
70            Self::BGD64(bgd) => {
71                u64::from(bgd.base.ext2bgd_i_tables) + (u64::from(bgd.ext4bgd_i_tables_hi) << 32)
72            }
73        }
74    }
75}
76
77/// EXT4 Parser
78///
79/// Takes in a `Reader` that is able to read arbitrary chunks of data from the filesystem image.
80///
81/// Basic use:
82/// let mut parser = Parser::new(VecReader::new(vec_of_u8));
83/// let tree = parser.build_fuchsia_tree()
84impl Parser {
85    pub fn new(reader: Box<dyn Reader>) -> Self {
86        Parser { reader, super_block: OnceCell::new() }
87    }
88
89    /// Returns the Super Block.
90    ///
91    /// If the super block has been parsed and saved before, return that.
92    /// Else, parse the super block and save it and return it.
93    ///
94    /// We never need to re-parse the super block in this read-only
95    /// implementation.
96    fn super_block(&self) -> Result<&SuperBlock, ParsingError> {
97        self.super_block.get_or_try_init(|| SuperBlock::parse(&self.reader))
98    }
99
100    /// Reads block size from the Super Block.
101    pub fn block_size(&self) -> Result<u64, ParsingError> {
102        self.super_block()?.block_size()
103    }
104
105    /// Reads full raw data from a given block number.
106    pub(crate) fn block(&self, block_number: u64) -> Result<Box<[u8]>, ParsingError> {
107        if block_number == 0 {
108            return Err(ParsingError::InvalidAddress(
109                InvalidAddressErrorType::Lower,
110                0,
111                FIRST_BG_PADDING,
112            ));
113        }
114        let block_size = self.block_size()?;
115        let address = block_number
116            .checked_mul(block_size)
117            .ok_or(ParsingError::BlockNumberOutOfBounds(block_number))?;
118
119        let mut data = vec![0u8; block_size.try_into().unwrap()];
120        self.reader.read(address, data.as_mut_slice()).map_err(Into::<ParsingError>::into)?;
121
122        Ok(data.into_boxed_slice())
123    }
124
125    /// Returns the address of the given `inode_number` within `self.reader`.
126    pub(crate) fn inode_addr(&self, inode_number: u32) -> Result<u64, ParsingError> {
127        if inode_number < 1 {
128            // INode number 0 is not allowed per ext4 spec.
129            return Err(ParsingError::InvalidInode(inode_number));
130        }
131        let sb = self.super_block()?;
132        let block_size = self.block_size()?;
133
134        // The first Block Group starts with:
135        // - 1024 byte padding
136        // - 1024 byte Super Block
137        // Then in the next block, there are many blocks worth of Block Group Descriptors.
138        // If the block size is 2048 bytes or larger, then the 1024 byte padding, and the
139        // Super Block both fit in the first block (0), and the Block Group Descriptors start
140        // at block 1.
141        //
142        // A 1024 byte block size means the padding takes block 0 and the Super Block takes
143        // block 1. This means the Block Group Descriptors start in block 2.
144        let bgd_table_offset = if block_size >= MIN_EXT4_SIZE {
145            // Padding and Super Block both fit in the first block, so offset to the next
146            // block.
147            block_size
148        } else {
149            // Block size is less than 2048. The only valid block size smaller than 2048 is 1024.
150            // Padding and Super Block take one block each, so offset to the third block.
151            block_size * 2
152        };
153
154        let bgd_offset = (inode_number - 1) as u64 / sb.e2fs_ipg.get() as u64
155            * sb.block_group_descriptor_size() as u64;
156        let bgd = if sb.is_64bit() {
157            BlockGroupDescriptor::BGD64(BlockGroupDesc64::from_reader_with_offset(
158                &self.reader,
159                bgd_table_offset + bgd_offset,
160            )?)
161        } else {
162            BlockGroupDescriptor::BGD32(BlockGroupDesc32::from_reader_with_offset(
163                &self.reader,
164                bgd_table_offset + bgd_offset,
165            )?)
166        };
167
168        // Offset could really be anywhere, and the Reader will enforce reading within the
169        // filesystem size. Not much can be checked here.
170        let inode_table_offset =
171            (inode_number - 1) as u64 % sb.e2fs_ipg.get() as u64 * sb.e2fs_inode_size.get() as u64;
172        let inode_addr = (bgd.inode_table_block() * block_size) + inode_table_offset;
173        if inode_addr < MIN_EXT4_SIZE {
174            return Err(ParsingError::InvalidAddress(
175                InvalidAddressErrorType::Lower,
176                inode_addr,
177                MIN_EXT4_SIZE,
178            ));
179        }
180        Ok(inode_addr)
181    }
182
183    /// Reads the INode at the given inode number.
184    pub fn inode(&self, inode_number: u32) -> Result<INode, ParsingError> {
185        INode::from_reader_with_offset(&self.reader, self.inode_addr(inode_number)?)
186    }
187
188    /// Helper function to get the root directory INode.
189    pub fn root_inode(&self) -> Result<INode, ParsingError> {
190        self.inode(ROOT_INODE_NUM)
191    }
192
193    /// Reads all raw data from a given extent leaf node.
194    fn extent_data(&self, extent: &Extent, mut allowance: u64) -> Result<Vec<u8>, ParsingError> {
195        let block_number = extent.target_block_num();
196        let block_count = extent.e_len.get() as u64;
197        let block_size = self.block_size()?;
198        let mut read_len;
199
200        let mut data = Vec::with_capacity((block_size * block_count).try_into().unwrap());
201
202        for i in 0..block_count {
203            let block_data = self.block(block_number + i as u64)?;
204            if allowance >= block_size {
205                read_len = block_size;
206            } else {
207                read_len = allowance;
208            }
209            let block_data = &block_data[0..read_len.try_into().unwrap()];
210            data.append(&mut block_data.to_vec());
211            allowance -= read_len;
212        }
213
214        Ok(data)
215    }
216
217    /// Reads the inode size and raw extent data for a regular file.  Fails if the provided inode is
218    /// not a regular file.
219    pub fn read_extents(&self, inode_num: u32) -> Result<(u64, Vec<Extent>), ParsingError> {
220        let inode = self.inode(inode_num)?;
221
222        // Make sure this is a regular file.
223        const IFMT: u16 = 0xf000;
224        const IFREG: u16 = 0x8000;
225        if u16::from(inode.e2di_mode) & IFMT != IFREG {
226            return Err(ParsingError::NotFile);
227        }
228
229        let root_extent_tree_node = inode.extent_tree_node()?;
230        let mut extents = Vec::new();
231
232        self.iterate_extents_in_tree(&root_extent_tree_node, &mut |extent| {
233            extents.push(extent.clone());
234            Ok(())
235        })?;
236
237        Ok((inode.size(), extents))
238    }
239
240    /// Reads extent data from a leaf node.
241    ///
242    /// # Arguments
243    /// * `extent`: Extent from which to read data from.
244    /// * `data`: Vec where data that is read is added.
245    /// * `allowance`: The maximum number of bytes to read from the extent. The
246    ///    given file allowance is updated on each call to track sizing for an
247    ///    entire extent tree.
248    fn read_extent_data(
249        &self,
250        extent: &Extent,
251        data: &mut Vec<u8>,
252        allowance: &mut u64,
253    ) -> Result<(), ParsingError> {
254        let mut extent_data = self.extent_data(&extent, *allowance)?;
255        let extent_len = extent_data.len() as u64;
256        if extent_len > *allowance {
257            return Err(ParsingError::ExtentUnexpectedLength(extent_len, *allowance));
258        }
259        *allowance -= extent_len;
260        data.append(&mut extent_data);
261        Ok(())
262    }
263
264    /// Reads directory entries from an extent leaf node.
265    fn read_dir_entries(
266        &self,
267        extent: &Extent,
268        entries: &mut Vec<DirEntry2>,
269    ) -> Result<(), ParsingError> {
270        let block_size = self.block_size()?;
271        let target_block_offset = extent.target_block_num() * block_size;
272
273        // The `e2d_reclen` of the last entry will be large enough fill the
274        // remaining space of the block.
275        for block_index in 0..extent.e_len.get() {
276            let mut dir_entry_offset = 0u64;
277            while (dir_entry_offset + size_of::<DirEntryHeader>() as u64) < block_size {
278                let offset =
279                    dir_entry_offset + target_block_offset + (block_index as u64 * block_size);
280
281                let de_header = DirEntryHeader::from_reader_with_offset(&self.reader, offset)?;
282                let mut de = DirEntry2 {
283                    e2d_ino: de_header.e2d_ino,
284                    e2d_reclen: de_header.e2d_reclen,
285                    e2d_namlen: de_header.e2d_namlen,
286                    e2d_type: de_header.e2d_type,
287                    e2d_name: [0u8; 255],
288                };
289                self.reader.read(
290                    offset + size_of::<DirEntryHeader>() as u64,
291                    &mut de.e2d_name[..de.e2d_namlen as usize],
292                )?;
293
294                dir_entry_offset += de.e2d_reclen.get() as u64;
295
296                if de.e2d_ino.get() != 0 {
297                    entries.push(de);
298                }
299            }
300        }
301        Ok(())
302    }
303
304    /// Handles an extent tree leaf node by invoking `extent_handler` for each contained extent.
305    fn iterate_extents_in_leaf<B: SplitByteSlice, F: FnMut(&Extent) -> Result<(), ParsingError>>(
306        &self,
307        extent_tree_node: &ExtentTreeNode<B>,
308        extent_handler: &mut F,
309    ) -> Result<(), ParsingError> {
310        for e_index in 0..extent_tree_node.header.eh_ecount.get() {
311            let start = size_of::<Extent>() * e_index as usize;
312            let end = start + size_of::<Extent>() as usize;
313            let e = Extent::to_struct_ref(
314                &(extent_tree_node.entries)[start..end],
315                ParsingError::InvalidExtent(start as u64),
316            )?;
317
318            extent_handler(e)?;
319        }
320
321        Ok(())
322    }
323
324    /// Handles traversal down an extent tree.
325    pub(crate) fn iterate_extents_in_tree<
326        B: SplitByteSlice,
327        F: FnMut(&Extent) -> Result<(), ParsingError>,
328    >(
329        &self,
330        extent_tree_node: &ExtentTreeNode<B>,
331        extent_handler: &mut F,
332    ) -> Result<(), ParsingError> {
333        let block_size = self.block_size()?;
334
335        match extent_tree_node.header.eh_depth.get() {
336            0 => {
337                self.iterate_extents_in_leaf(extent_tree_node, extent_handler)?;
338            }
339            1..=4 => {
340                for e_index in 0..extent_tree_node.header.eh_ecount.get() {
341                    let start: usize = size_of::<Extent>() * e_index as usize;
342                    let end = start + size_of::<Extent>();
343                    let e = ExtentIndex::to_struct_ref(
344                        &(extent_tree_node.entries)[start..end],
345                        ParsingError::InvalidExtent(start as u64),
346                    )?;
347
348                    let next_level_offset = e.target_block_num() as u64 * block_size;
349
350                    let next_extent_header =
351                        ExtentHeader::from_reader_with_offset(&self.reader, next_level_offset)?;
352
353                    let entry_count = next_extent_header.eh_ecount.get() as usize;
354                    let entry_size = match next_extent_header.eh_depth.get() {
355                        0 => size_of::<Extent>(),
356                        _ => size_of::<ExtentIndex>(),
357                    };
358                    let node_size = size_of::<ExtentHeader>() + (entry_count * entry_size);
359
360                    let mut data = vec![0u8; node_size];
361                    self.reader.read(next_level_offset, data.as_mut_slice())?;
362
363                    let next_level_node = ExtentTreeNode::parse(data.as_slice())
364                        .ok_or(ParsingError::InvalidExtent(next_level_offset))?;
365
366                    self.iterate_extents_in_tree(&next_level_node, extent_handler)?;
367                }
368            }
369            _ => return Err(ParsingError::InvalidExtentHeader),
370        };
371
372        Ok(())
373    }
374
375    /// Lists directory entries from the directory that is the given Inode.
376    ///
377    /// Errors if the Inode does not map to a Directory.
378    pub fn entries_from_inode(&self, inode: &INode) -> Result<Vec<DirEntry2>, ParsingError> {
379        let root_extent_tree_node = inode.extent_tree_node()?;
380        let mut dir_entries = Vec::new();
381
382        self.iterate_extents_in_tree(&root_extent_tree_node, &mut |extent| {
383            self.read_dir_entries(extent, &mut dir_entries)
384        })?;
385
386        Ok(dir_entries)
387    }
388
389    /// Gets any DirEntry2 that isn't root.
390    ///
391    /// Root doesn't have a DirEntry2.
392    ///
393    /// When dynamic loading of files is supported, this is the required mechanism.
394    pub fn entry_at_path(&self, path: &Path) -> Result<DirEntry2, ParsingError> {
395        let root_inode = self.root_inode()?;
396        let root_entries = self.entries_from_inode(&root_inode)?;
397        let mut entry_map = DirEntry2::as_hash_map(root_entries)?;
398
399        let mut components = path.components().peekable();
400        let mut component = components.next();
401
402        while component != None {
403            match component {
404                Some(Component::RootDir) => {
405                    // Skip
406                }
407                Some(Component::Normal(name)) => {
408                    let name = name.to_str().ok_or(ParsingError::InvalidInputPath)?;
409                    if let Some(entry) = entry_map.remove(name) {
410                        if components.peek() == None {
411                            return Ok(entry);
412                        }
413                        match EntryType::from_u8(entry.e2d_type)? {
414                            EntryType::Directory => {
415                                let inode = self.inode(entry.e2d_ino.get())?;
416                                entry_map =
417                                    DirEntry2::as_hash_map(self.entries_from_inode(&inode)?)?;
418                            }
419                            _ => {
420                                break;
421                            }
422                        }
423                    }
424                }
425                _ => {
426                    break;
427                }
428            }
429            component = components.next();
430        }
431
432        match path.to_str() {
433            Some(s) => Err(ParsingError::PathNotFound(s.to_string())),
434            None => Err(ParsingError::PathNotFound(
435                "Bad path - was not able to convert into string".to_string(),
436            )),
437        }
438    }
439
440    /// Reads all raw data for a given inode.
441    ///
442    /// For a file, this will be the file data. For a symlink,
443    /// this will be the symlink target.
444    pub fn read_data(&self, inode_num: u32) -> Result<Vec<u8>, ParsingError> {
445        let inode = self.inode(inode_num)?;
446        let mut size_remaining = inode.size();
447        let mut data = Vec::with_capacity(size_remaining.try_into().unwrap());
448
449        // Check for symlink with inline data.
450        if u16::from(inode.e2di_mode) & 0xa000 != 0 && u32::from(inode.e2di_nblock) == 0 {
451            data.extend_from_slice(&inode.e2di_blocks[..inode.size().try_into().unwrap()]);
452            return Ok(data);
453        }
454
455        let root_extent_tree_node = inode.extent_tree_node()?;
456        let mut extents = Vec::new();
457
458        self.iterate_extents_in_tree(&root_extent_tree_node, &mut |extent| {
459            extents.push(extent.clone());
460            Ok(())
461        })?;
462
463        let block_size = self.block_size()?;
464
465        // Summarized from https://www.kernel.org/doc/ols/2007/ols2007v2-pages-21-34.pdf,
466        // Section 2.2: Extent and ExtentHeader entries must be sorted by logical block number. This
467        // enforces that when the extent tree is traversed depth first that a list of extents sorted
468        // by logical block number is produced. This is a requirement to produce the proper ordering
469        // of bytes within `data` here.
470        for extent in extents {
471            let buffer_offset = extent.e_blk.get() as u64 * block_size;
472
473            // File may be sparse. Sparse files will have gaps
474            // between logical blocks. Fill in any gaps with zeros.
475            if buffer_offset > data.len() as u64 {
476                size_remaining -= buffer_offset - data.len() as u64;
477                data.resize(buffer_offset.try_into().unwrap(), 0);
478            }
479
480            self.read_extent_data(&extent, &mut data, &mut size_remaining)?;
481        }
482
483        // If there are zero pages at the end of the file, they won't appear in the extents list.
484        // Pad the data with zeroes to the full file length.
485        // TODO(https://fxbug.dev/42073237): Add a test for this behavior, once better test infra exists.
486        data.resize(inode.size().try_into().unwrap(), 0);
487        Ok(data)
488    }
489
490    /// Progress through the entire directory tree starting from the given INode.
491    ///
492    /// If given the root directory INode, this will process through every directory entry in the
493    /// filesystem in a DFS manner.
494    ///
495    /// Takes in a closure that will be called for each entry found.
496    /// Closure should return `Ok(true)` in order to continue the process, otherwise the process
497    /// will stop.
498    ///
499    /// Returns Ok(true) if it has indexed its subtree successfully. Otherwise, if the receiver
500    /// chooses to cancel indexing early, an Ok(false) is returned and propagated up.
501    pub fn index<R, E>(&self, inode: INode, prefix: Vec<&str>, receiver: &mut R) -> Result<bool, E>
502    where
503        E: From<ParsingError>,
504        R: FnMut(&Parser, Vec<&str>, &DirEntry2) -> Result<bool, E>,
505    {
506        let entries = self.entries_from_inode(&inode)?;
507        for entry in entries {
508            let entry_name = entry.name()?;
509            if entry_name == "." || entry_name == ".." {
510                continue;
511            }
512            let mut name = Vec::new();
513            name.append(&mut prefix.clone());
514            name.push(entry_name);
515            if !receiver(self, name.clone(), &entry)? {
516                return Ok(false);
517            }
518            if EntryType::from_u8(entry.e2d_type)? == EntryType::Directory {
519                let inode = self.inode(entry.e2d_ino.get())?;
520                if !self.index(inode, name, receiver)? {
521                    return Ok(false);
522                }
523            }
524        }
525
526        Ok(true)
527    }
528
529    /// Returns the xattrs associated with `inode_number`.
530    pub fn inode_xattrs(&self, inode_number: u32) -> Result<XattrMap, ParsingError> {
531        let mut xattrs = BTreeMap::new();
532
533        let inode_addr = self.inode_addr(inode_number).expect("Couldn't get inode address");
534        let inode =
535            INode::from_reader_with_offset(&self.reader, inode_addr).expect("Failed reader");
536
537        let sb = self.super_block().expect("No super block for inode");
538        let xattr_magic_addr = inode_addr
539            + MINIMUM_INODE_SIZE
540            + u64::from(inode.e4di_extra_isize(sb).unwrap_or_default());
541
542        let mut magic = LEU32::ZERO;
543        self.reader.read(xattr_magic_addr, magic.as_mut_bytes()).expect("Failed to read xattr");
544        if magic.get() == Self::XATTR_MAGIC {
545            let first_entry = xattr_magic_addr + size_of_val(&magic) as u64;
546            self.read_xattr_entries_from_inode(
547                first_entry,
548                inode_addr + (sb.e2fs_inode_size.get() as u64),
549                &mut xattrs,
550            )?;
551        }
552
553        let block_number: u64 = inode.facl();
554        if block_number > 0 {
555            let block = self.block(block_number).expect("Couldn't find block");
556            Self::read_xattr_entries_from_block(&block, &mut xattrs)?;
557        }
558
559        Ok(xattrs)
560    }
561
562    const XATTR_ALIGNMENT: u64 = 4;
563    const XATTR_MAGIC: u32 = 0xea020000;
564
565    fn round_up_to_align(x: u64, align: u64) -> u64 {
566        let spare = x % align;
567        if spare > 0 { x.checked_add(align - spare).expect("Overflow when aligning") } else { x }
568    }
569
570    fn is_valid_xattr_entry_header(header: &XattrEntryHeader) -> bool {
571        !(header.e_name_len == 0
572            && header.e_name_index == 0
573            && header.e_value_offs.get() == 0
574            && header.e_value_inum.get() == 0)
575    }
576
577    fn xattr_prefix_for_name_index(header: &XattrEntryHeader) -> Vec<u8> {
578        match header.e_name_index {
579            1 => b"user.".to_vec(),
580            2 => b"system.posix_acl_access.".to_vec(),
581            3 => b"system.posix_acl_default.".to_vec(),
582            4 => b"trusted.".to_vec(),
583            6 => b"security.".to_vec(),
584            7 => b"system.".to_vec(),
585            8 => b"system.richacl".to_vec(),
586            _ => b"".to_vec(),
587        }
588    }
589
590    /// Reads all the xattr entries, stored in the inode, from `entries_addr` into `xattrs`.
591    fn read_xattr_entries_from_inode(
592        &self,
593        mut entries_addr: u64,
594        inode_end: u64,
595        xattrs: &mut XattrMap,
596    ) -> Result<(), ParsingError> {
597        let value_base_addr = entries_addr;
598        while entries_addr + (std::mem::size_of::<XattrEntryHeader>() as u64) < inode_end {
599            let head = XattrEntryHeader::from_reader_with_offset(&self.reader, entries_addr)?;
600            if !Self::is_valid_xattr_entry_header(&head) {
601                break;
602            }
603
604            let prefix = Self::xattr_prefix_for_name_index(&head);
605            let mut name = Vec::with_capacity(prefix.len() + head.e_name_len as usize);
606            name.extend_from_slice(&prefix);
607            name.resize(prefix.len() + head.e_name_len as usize, 0);
608
609            self.reader.read(
610                entries_addr + size_of::<XattrEntryHeader>() as u64,
611                &mut name[prefix.len()..],
612            )?;
613
614            let mut value = vec![0u8; head.e_value_size.get() as usize];
615            self.reader.read(value_base_addr + u64::from(head.e_value_offs), &mut value)?;
616            xattrs.insert(name, value);
617
618            entries_addr += size_of::<XattrEntryHeader>() as u64 + head.e_name_len as u64;
619            entries_addr = Self::round_up_to_align(entries_addr, Self::XATTR_ALIGNMENT);
620        }
621        Ok(())
622    }
623
624    /// Reads all the xattr entries, stored in the inode, from `entries_addr` into `xattrs`.
625    fn read_xattr_entries_from_block(
626        block: &[u8],
627        xattrs: &mut XattrMap,
628    ) -> Result<(), ParsingError> {
629        let head = XattrHeader::to_struct_ref(
630            &block[..std::mem::size_of::<XattrHeader>()],
631            ParsingError::Incompatible("Invalid XattrHeader".to_string()),
632        )?;
633
634        if head.e_magic.get() != Self::XATTR_MAGIC {
635            return Ok(());
636        }
637
638        let mut offset = Self::round_up_to_align(
639            std::mem::size_of::<XattrHeader>() as u64,
640            Self::XATTR_ALIGNMENT * 2,
641        ) as usize;
642
643        while offset + std::mem::size_of::<XattrEntryHeader>() < block.len() {
644            let head = XattrEntryHeader::to_struct_ref(
645                &block[offset..offset + std::mem::size_of::<XattrEntryHeader>()],
646                ParsingError::Incompatible("Invalid XattrEntryHeader".to_string()),
647            )?;
648
649            if !Self::is_valid_xattr_entry_header(&head) {
650                break;
651            }
652
653            let name_start = offset + std::mem::size_of::<XattrEntryHeader>();
654            let name_end = name_start + head.e_name_len as usize;
655            let mut name = Self::xattr_prefix_for_name_index(&head);
656            name.extend_from_slice(&block[name_start..name_end]);
657
658            let value_start = head.e_value_offs.get() as usize;
659            let value_end = value_start + head.e_value_size.get() as usize;
660            let value = block[value_start..value_end].to_vec();
661            xattrs.insert(name, value);
662
663            offset = Self::round_up_to_align(name_end as u64, 4) as usize;
664        }
665
666        Ok(())
667    }
668}
669
670#[cfg(test)]
671mod tests {
672    use crate::parser::Parser;
673    use crate::readers::VecReader;
674    use crate::structs::{EntryType, ParsingError};
675    use maplit::hashmap;
676    use sha2::{Digest, Sha256};
677    use std::collections::{HashMap, HashSet};
678    use std::path::Path;
679    use std::{fs, str};
680    use test_case::test_case;
681
682    #[fuchsia::test]
683    fn list_root_1_file() {
684        let data = fs::read("/pkg/data/1file.img").expect("Unable to read file");
685        let parser = Parser::new(Box::new(VecReader::new(data)));
686        assert!(parser.super_block().expect("Super Block").check_magic().is_ok());
687        let root_inode = parser.root_inode().expect("Parse INode");
688        let entries = parser.entries_from_inode(&root_inode).expect("List entries");
689        let mut expected_entries = vec!["file1", "lost+found", "..", "."];
690
691        for de in &entries {
692            assert_eq!(expected_entries.pop().unwrap(), de.name().unwrap());
693        }
694        assert_eq!(expected_entries.len(), 0);
695    }
696
697    #[test_case(
698        "/pkg/data/nest.img",
699        vec!["inner", "file1", "lost+found", "..", "."];
700        "fs with a single directory")]
701    #[test_case(
702        "/pkg/data/extents.img",
703        vec!["trailingzeropages", "a", "smallfile", "largefile", "sparsefile", "lost+found", "..", "."];
704        "fs with multiple files with multiple extents")]
705    fn list_root(ext4_path: &str, mut expected_entries: Vec<&str>) {
706        let data = fs::read(ext4_path).expect("Unable to read file");
707        let parser = Parser::new(Box::new(VecReader::new(data)));
708        assert!(parser.super_block().expect("Super Block").check_magic().is_ok());
709        let root_inode = parser.root_inode().expect("Parse INode");
710        let entries = parser.entries_from_inode(&root_inode).expect("List entries");
711
712        for de in &entries {
713            assert_eq!(expected_entries.pop().unwrap(), de.name().unwrap());
714        }
715        assert_eq!(expected_entries.len(), 0);
716    }
717
718    #[fuchsia::test]
719    fn get_from_path() {
720        let data = fs::read("/pkg/data/nest.img").expect("Unable to read file");
721        let parser = Parser::new(Box::new(VecReader::new(data)));
722        assert!(parser.super_block().expect("Super Block").check_magic().is_ok());
723
724        let entry = parser.entry_at_path(Path::new("/inner")).expect("Entry at path");
725        assert_eq!(entry.e2d_ino.get(), 12);
726        assert_eq!(entry.name().unwrap(), "inner");
727
728        let entry = parser.entry_at_path(Path::new("/inner/file2")).expect("Entry at path");
729        assert_eq!(entry.e2d_ino.get(), 17);
730        assert_eq!(entry.name().unwrap(), "file2");
731    }
732
733    #[fuchsia::test]
734    fn read_data() {
735        let data = fs::read("/pkg/data/1file.img").expect("Unable to read file");
736        let parser = Parser::new(Box::new(VecReader::new(data)));
737        assert!(parser.super_block().expect("Super Block").check_magic().is_ok());
738
739        let entry = parser.entry_at_path(Path::new("file1")).expect("Entry at path");
740        assert_eq!(entry.e2d_ino.get(), 15);
741        assert_eq!(entry.name().unwrap(), "file1");
742
743        let data = parser.read_data(entry.e2d_ino.into()).expect("File data");
744        let compare = "file1 contents.\n";
745        assert_eq!(data.len(), compare.len());
746        assert_eq!(str::from_utf8(data.as_slice()).expect("File data"), compare);
747    }
748
749    #[fuchsia::test]
750    fn fail_inode_zero() {
751        let data = fs::read("/pkg/data/1file.img").expect("Unable to read file");
752        let parser = Parser::new(Box::new(VecReader::new(data)));
753        assert!(parser.inode(0).is_err());
754    }
755
756    #[fuchsia::test]
757    fn index() {
758        let data = fs::read("/pkg/data/nest.img").expect("Unable to read file");
759        let parser = Parser::new(Box::new(VecReader::new(data)));
760        assert!(parser.super_block().expect("Super Block").check_magic().is_ok());
761
762        let mut count = 0;
763        let mut entries: HashSet<u32> = HashSet::new();
764        let root_inode = parser.root_inode().expect("Root inode");
765
766        parser
767            .index(root_inode, Vec::new(), &mut |_, _, entry| {
768                count += 1;
769
770                // Make sure each inode only appears once.
771                assert_ne!(entries.contains(&entry.e2d_ino.get()), true);
772                entries.insert(entry.e2d_ino.get());
773
774                Ok::<bool, ParsingError>(true)
775            })
776            .expect("Index");
777
778        assert_eq!(count, 4);
779    }
780
781    #[fuchsia::test]
782    fn xattr() {
783        let data = fs::read("/pkg/data/xattr.img").expect("Unable to read file");
784        let parser = Parser::new(Box::new(VecReader::new(data)));
785        assert!(parser.super_block().expect("Super Block").check_magic().is_ok());
786        let root_inode = parser.root_inode().expect("Root inode");
787        let mut found_files = HashSet::new();
788
789        parser
790            .index(root_inode, Vec::new(), &mut |_, _, entry| {
791                let name = entry.e2d_name;
792                let inode = entry.e2d_ino.get();
793                let attributes = parser.inode_xattrs(inode).expect("Extended attributes");
794                match name {
795                    name if &name[0..10] == b"lost+found" => {
796                        assert_eq!(attributes.len(), 0);
797                        found_files.insert("lost+found");
798                    }
799                    name if &name[0..5] == b"file1" => {
800                        assert_eq!(attributes.len(), 1);
801                        assert_eq!(attributes[&b"user.test".to_vec()], b"test value".to_vec());
802                        found_files.insert("file1");
803                    }
804                    name if &name[0..9] == b"file_many" => {
805                        assert_eq!(attributes.len(), 6);
806                        assert_eq!(
807                            attributes[&b"user.long".to_vec()],
808                            b"vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv".to_vec()
809                        );
810                        found_files.insert("file_many");
811                    }
812                    name if &name[0..6] == b"subdir" => {
813                        assert_eq!(attributes.len(), 1);
814                        assert_eq!(attributes[&b"user.type".to_vec()], b"dir".to_vec());
815                        found_files.insert("subdir");
816                    }
817                    name if &name[0..5] == b"file2" => {
818                        assert_eq!(attributes.len(), 2);
819                        assert_eq!(
820                            attributes[&b"user.test_one".to_vec()],
821                            b"test value 1".to_vec()
822                        );
823                        assert_eq!(
824                            attributes[&b"user.test_two".to_vec()],
825                            b"test value 2".to_vec()
826                        );
827                        found_files.insert("file2");
828                    }
829                    _ => {}
830                }
831                Ok::<bool, ParsingError>(true)
832            })
833            .expect("Index");
834
835        assert_eq!(found_files.len(), 5);
836    }
837
838    #[test_case(
839        "/pkg/data/extents.img",
840        hashmap!{
841            "largefile".to_string() => "de2cf635ae4e0e727f1e412f978001d6a70d2386dc798d4327ec8c77a8e4895d".to_string(),
842            "smallfile".to_string() => "5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03".to_string(),
843            "sparsefile".to_string() => "3f411e42c1417cd8845d7144679812be3e120318d843c8c6e66d8b2c47a700e9".to_string(),
844            "trailingzeropages".to_string() => "afc5cc689fd3cb8d00c147d60dc911a70d36b7afb03cc7f15de9c78a52be978d".to_string(),
845            "a/multi/dir/path/within/this/crowded/extents/test/img/empty".to_string() => "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855".to_string(),
846        },
847        vec!["a/multi/dir/path/within/this/crowded/extents/test/img", "lost+found"];
848        "fs with multiple files with multiple extents")]
849    #[test_case(
850        "/pkg/data/1file.img",
851        hashmap!{
852            "file1".to_string() => "6bc35bfb2ca96c75a1fecde205693c19a827d4b04e90ace330048f3e031487dd".to_string(),
853        },
854        vec!["lost+found"];
855        "fs with one small file")]
856    #[test_case(
857        "/pkg/data/nest.img",
858        hashmap!{
859            "file1".to_string() => "6bc35bfb2ca96c75a1fecde205693c19a827d4b04e90ace330048f3e031487dd".to_string(),
860            "inner/file2".to_string() => "215ca145cbac95c9e2a6f5ff91ca1887c837b18e5f58fd2a7a16e2e5a3901e10".to_string(),
861        },
862        vec!["inner", "lost+found"];
863        "fs with a single directory")]
864    #[test_case(
865        "/pkg/data/nest64.img",
866        hashmap!{
867            "file1".to_string() => "6bc35bfb2ca96c75a1fecde205693c19a827d4b04e90ace330048f3e031487dd".to_string(),
868            "inner/file2".to_string() => "215ca145cbac95c9e2a6f5ff91ca1887c837b18e5f58fd2a7a16e2e5a3901e10".to_string(),
869        },
870        vec!["inner", "lost+found"];
871        "fs with 64bit enabled and a single directory")]
872    #[test_case(
873        "/pkg/data/longdir.img",
874        {
875            let mut hash = HashMap::new();
876            for i in 1..=1000 {
877                hash.insert(i.to_string(), "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855".to_string());
878            }
879            hash
880        },
881        vec!["lost+found"];
882        "fs with many entries in a directory")]
883    fn check_data(
884        ext4_path: &str,
885        mut file_hashes: HashMap<String, String>,
886        expected_dirs: Vec<&str>,
887    ) {
888        let data = fs::read(ext4_path).expect("Unable to read file");
889        let parser = Parser::new(Box::new(VecReader::new(data)));
890        assert!(parser.super_block().expect("Super Block").check_magic().is_ok());
891
892        let root_inode = parser.root_inode().expect("Root inode");
893
894        parser
895            .index(root_inode, Vec::new(), &mut |my_self, path, entry| {
896                let entry_type = EntryType::from_u8(entry.e2d_type).expect("Entry Type");
897                let file_path = path.join("/");
898
899                match entry_type {
900                    EntryType::RegularFile => {
901                        let data = my_self.read_data(entry.e2d_ino.into()).expect("File data");
902
903                        let mut hasher = Sha256::new();
904                        hasher.update(&data);
905                        assert_eq!(
906                            file_hashes.remove(&file_path).unwrap(),
907                            hex::encode(hasher.finalize())
908                        );
909                    }
910                    EntryType::Directory => {
911                        let mut found = false;
912
913                        // These should be the only possible directories.
914                        for expected_dir in expected_dirs.iter() {
915                            if expected_dir.starts_with(&file_path) {
916                                found = true;
917                                break;
918                            }
919                        }
920                        assert!(found, "Unexpected path {}", file_path);
921                    }
922                    _ => {
923                        assert!(false, "No other types should exist in this image.");
924                    }
925                }
926                Ok::<bool, ParsingError>(true)
927            })
928            .expect("Index");
929        assert!(file_hashes.is_empty(), "Expected files were not found {:?}", file_hashes);
930    }
931}