ext4_metadata/
lib.rs

1// Copyright 2023 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::sorted_vec_map::SortedVecMap;
6use flyweights::{FlyByteStr, FlyStr};
7use serde::{Deserialize, Serialize};
8use std::collections::{BTreeMap, HashSet};
9use std::sync::Arc;
10use thiserror::Error;
11
12mod sorted_vec_map;
13
14// This is deliberately the same as ext4.
15pub const ROOT_INODE_NUM: u64 = 2;
16
17pub const S_IFDIR: u16 = 0x4000;
18pub const S_IFREG: u16 = 0x8000;
19pub const S_IFLNK: u16 = 0xa000;
20pub const S_IFMT: u16 = 0xf000;
21
22#[derive(Error, Debug)]
23pub enum MetadataError {
24    #[error("Node not found")]
25    NotFound,
26    #[error("Node is not a directory")]
27    NotDir,
28    #[error("Failed to deserialize metadata (corrupt?)")]
29    FailedToDeserialize,
30}
31
32#[derive(Serialize, Deserialize)]
33pub struct Metadata {
34    nodes: SortedVecMap<u64, Node>,
35}
36
37impl Metadata {
38    pub fn new() -> Self {
39        Self { nodes: SortedVecMap::new() }
40    }
41
42    /// Finds the node with name `name` in directory with inode number `parent`.
43    pub fn lookup(&self, parent: u64, name: &str) -> Result<u64, MetadataError> {
44        self.nodes
45            .get(&parent)
46            .ok_or(MetadataError::NotFound)?
47            .directory()
48            .ok_or(MetadataError::NotDir)?
49            .children
50            .get(&FlyStr::from(name))
51            .ok_or(MetadataError::NotFound)
52            .cloned()
53    }
54
55    /// Returns the node with inode number `inode_num`.
56    pub fn get(&self, inode_num: u64) -> Option<&Node> {
57        self.nodes.get(&inode_num)
58    }
59
60    /// Deserializes the metadata.
61    pub fn deserialize(bytes: &[u8]) -> Result<Self, MetadataError> {
62        let mut metadata: Self =
63            bincode::deserialize(bytes).map_err(|_| MetadataError::FailedToDeserialize)?;
64        metadata.dedup_xattrs();
65        Ok(metadata)
66    }
67
68    /// Serializes the metadata.
69    pub fn serialize(&self) -> Vec<u8> {
70        bincode::serialize(&self).unwrap()
71    }
72
73    /// Add a child at `path` with inode number `inode_num`.
74    pub fn add_child(&mut self, path: &[&str], inode_num: u64) {
75        for component in path {
76            assert!(
77                *component != "."
78                    && *component != ".."
79                    && !component.contains(|c| c == '/' || c == '\0'),
80                "Invalid path component {component:?} in {path:?}"
81            );
82        }
83
84        let mut node = self.nodes.get_mut(&ROOT_INODE_NUM).unwrap().directory_mut().unwrap();
85        let mut iter = path.iter();
86        let name = *iter.next_back().unwrap();
87        for component in iter {
88            let inode_num = *node.children.get(&FlyStr::from(*component)).unwrap();
89            node = self.nodes.get_mut(&inode_num).unwrap().directory_mut().unwrap();
90        }
91        node.children.insert(name.into(), inode_num);
92    }
93
94    /// Inserts a directory node.  This will not add a child to a directory; see `add_child`.
95    pub fn insert_directory(
96        &mut self,
97        inode_num: u64,
98        mode: u16,
99        uid: u16,
100        gid: u16,
101        extended_attributes: ExtendedAttributes,
102    ) {
103        assert_eq!(mode & S_IFMT, S_IFDIR);
104        self.nodes.insert(
105            inode_num,
106            Node {
107                info: NodeInfo::Directory(Directory { children: SortedVecMap::new() }),
108                mode,
109                uid,
110                gid,
111                extended_attributes,
112            },
113        );
114    }
115
116    /// Inserts a file node.  This will not add a child to a directory; see `add_child`.
117    pub fn insert_file(
118        &mut self,
119        inode_num: u64,
120        mode: u16,
121        uid: u16,
122        gid: u16,
123        extended_attributes: ExtendedAttributes,
124    ) {
125        assert_eq!(mode & S_IFMT, S_IFREG);
126        self.nodes.insert(
127            inode_num,
128            Node { info: NodeInfo::File(File), mode, uid, gid, extended_attributes },
129        );
130    }
131
132    /// Inserts a symlink node.  This will not add a child to a directory; see `add_child`.
133    pub fn insert_symlink(
134        &mut self,
135        inode_num: u64,
136        target: String,
137        mode: u16,
138        uid: u16,
139        gid: u16,
140        extended_attributes: ExtendedAttributes,
141    ) {
142        assert_eq!(mode & S_IFMT, S_IFLNK);
143        self.nodes.insert(
144            inode_num,
145            Node {
146                info: NodeInfo::Symlink(Symlink { target: target.into() }),
147                mode,
148                uid,
149                gid,
150                extended_attributes,
151            },
152        );
153    }
154
155    /// Nodes will often have the exact same set of extended attributes as other nodes. For this
156    /// reason, entire sets of extended attributes are reference counted and shared between nodes.
157    /// The serialized format of the metadata writes out the extended attributes along with each
158    /// node. After deserializing the metadata, each node will contain its own copy of the extended
159    /// attributes. This function scans all of the nodes and deduplicates the extended attributes to
160    /// save memory.
161    fn dedup_xattrs(&mut self) {
162        let mut xattrs: HashSet<Arc<SortedVecMap<FlyByteStr, FlyByteStr>>> = HashSet::new();
163        for node in self.nodes.values_mut() {
164            match xattrs.get(node.extended_attributes.0.as_ref()) {
165                Some(existing) => {
166                    node.extended_attributes.0 = existing.clone();
167                }
168                None => {
169                    xattrs.insert(node.extended_attributes.0.clone());
170                }
171            }
172        }
173    }
174}
175
176#[derive(Default, Clone, PartialEq, Eq, Debug)]
177pub struct ExtendedAttributes(Arc<SortedVecMap<FlyByteStr, FlyByteStr>>);
178
179impl<'de> serde::Deserialize<'de> for ExtendedAttributes {
180    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
181    where
182        D: serde::Deserializer<'de>,
183    {
184        Ok(Self(Arc::new(SortedVecMap::deserialize(deserializer)?)))
185    }
186}
187
188impl serde::Serialize for ExtendedAttributes {
189    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
190    where
191        S: serde::Serializer,
192    {
193        self.0.as_ref().serialize(serializer)
194    }
195}
196
197impl FromIterator<(FlyByteStr, FlyByteStr)> for ExtendedAttributes {
198    fn from_iter<T: IntoIterator<Item = (FlyByteStr, FlyByteStr)>>(iter: T) -> Self {
199        ExtendedAttributes(Arc::new(SortedVecMap::from_iter(iter)))
200    }
201}
202
203impl<const N: usize> From<[(FlyByteStr, FlyByteStr); N]> for ExtendedAttributes {
204    fn from(value: [(FlyByteStr, FlyByteStr); N]) -> Self {
205        value.into_iter().collect()
206    }
207}
208
209impl From<BTreeMap<Box<[u8]>, Box<[u8]>>> for ExtendedAttributes {
210    fn from(value: BTreeMap<Box<[u8]>, Box<[u8]>>) -> Self {
211        value.into_iter().map(|(k, v)| (k.into(), v.into())).collect()
212    }
213}
214
215impl ExtendedAttributes {
216    pub fn get(&self, key: &[u8]) -> Option<&FlyByteStr> {
217        self.0.get(&FlyByteStr::from(key))
218    }
219
220    pub fn keys(&self) -> impl Iterator<Item = &FlyByteStr> {
221        self.0.keys()
222    }
223
224    pub fn is_empty(&self) -> bool {
225        self.0.is_empty()
226    }
227}
228
229#[derive(Serialize, Deserialize)]
230pub struct Node {
231    info: NodeInfo,
232    pub mode: u16,
233    pub uid: u16,
234    pub gid: u16,
235    pub extended_attributes: ExtendedAttributes,
236}
237
238impl Node {
239    pub fn info(&self) -> &NodeInfo {
240        &self.info
241    }
242
243    pub fn directory(&self) -> Option<&Directory> {
244        match &self.info {
245            NodeInfo::Directory(d) => Some(d),
246            _ => None,
247        }
248    }
249
250    pub fn directory_mut(&mut self) -> Option<&mut Directory> {
251        match &mut self.info {
252            NodeInfo::Directory(d) => Some(d),
253            _ => None,
254        }
255    }
256
257    pub fn file(&self) -> Option<&File> {
258        match &self.info {
259            NodeInfo::File(f) => Some(f),
260            _ => None,
261        }
262    }
263
264    pub fn symlink(&self) -> Option<&Symlink> {
265        match &self.info {
266            NodeInfo::Symlink(s) => Some(s),
267            _ => None,
268        }
269    }
270}
271
272#[derive(Debug, Serialize, Deserialize)]
273pub enum NodeInfo {
274    Directory(Directory),
275    File(File),
276    Symlink(Symlink),
277}
278
279#[derive(Debug, Serialize, Deserialize)]
280pub struct Directory {
281    pub children: SortedVecMap<FlyStr, u64>,
282}
283
284#[derive(Debug, Serialize, Deserialize)]
285pub struct File;
286
287#[derive(Debug, Serialize, Deserialize)]
288pub struct Symlink {
289    pub target: FlyStr,
290}
291
292#[cfg(test)]
293mod tests {
294    use super::{
295        ExtendedAttributes, Metadata, NodeInfo, ROOT_INODE_NUM, S_IFDIR, S_IFLNK, S_IFREG,
296    };
297    use assert_matches::assert_matches;
298    use std::collections::BTreeMap;
299    use std::sync::Arc;
300
301    #[test]
302    fn test_serialize_and_deserialize() {
303        let mut m = Metadata::new();
304        let xattr: ExtendedAttributes =
305            [((*b"a").into(), (*b"apple").into()), ((*b"b").into(), (*b"ball").into())].into();
306        m.insert_directory(ROOT_INODE_NUM, S_IFDIR | 0o755, 2, 3, Default::default());
307        m.insert_directory(3, S_IFDIR | 0o775, 2, 3, xattr.clone());
308        m.add_child(&["foo"], 3);
309        m.insert_file(4, S_IFREG | 0o644, 2, 3, xattr.clone());
310        m.add_child(&["foo", "bar"], 4);
311        m.insert_symlink(5, "symlink-target".to_string(), S_IFLNK | 0o777, 2, 3, xattr.clone());
312        m.add_child(&["foo", "baz"], 5);
313
314        let m = Metadata::deserialize(&m.serialize()).expect("deserialize failed");
315        let node = m.get(ROOT_INODE_NUM).expect("root not found");
316        assert_matches!(node.info(), NodeInfo::Directory(_));
317        assert_eq!(node.mode, S_IFDIR | 0o755);
318        assert_eq!(node.uid, 2);
319        assert_eq!(node.gid, 3);
320        assert!(node.extended_attributes.is_empty());
321
322        assert_eq!(m.lookup(ROOT_INODE_NUM, "foo").expect("foo not found"), 3);
323        let node = m.get(3).expect("root not found");
324        assert_matches!(node.info(), NodeInfo::Directory(_));
325        assert_eq!(node.mode, S_IFDIR | 0o775);
326        assert_eq!(node.uid, 2);
327        assert_eq!(node.gid, 3);
328        assert_eq!(&node.extended_attributes, &xattr);
329
330        assert_eq!(m.lookup(3, "bar").expect("foo/bar not found"), 4);
331        let node = m.get(4).expect("root not found");
332        assert_matches!(node.info(), NodeInfo::File(_));
333        assert_eq!(node.mode, S_IFREG | 0o644);
334        assert_eq!(node.uid, 2);
335        assert_eq!(node.gid, 3);
336        assert_eq!(&node.extended_attributes, &xattr);
337
338        assert_eq!(m.lookup(3, "baz").expect("foo/baz not found"), 5);
339        let node = m.get(5).expect("root not found");
340        assert_matches!(node.info(), NodeInfo::Symlink(_));
341        assert_eq!(node.mode, S_IFLNK | 0o777);
342        assert_eq!(node.uid, 2);
343        assert_eq!(node.gid, 3);
344        assert_eq!(&node.extended_attributes, &xattr);
345    }
346
347    #[test]
348    fn test_serialization_is_deterministic() {
349        // Builds a Metadata instance with fixed contents.
350        let build_fs = || {
351            let mut m = Metadata::new();
352            let xattr: ExtendedAttributes =
353                [((*b"a").into(), (*b"apple").into()), ((*b"b").into(), (*b"ball").into())].into();
354            m.insert_directory(ROOT_INODE_NUM, S_IFDIR | 0o755, 2, 3, Default::default());
355            m.insert_directory(3, S_IFDIR | 0o775, 2, 3, xattr.clone());
356            m.add_child(&["foo"], 3);
357            m.insert_file(4, S_IFREG | 0o644, 2, 3, xattr.clone());
358            m.add_child(&["foo", "bar"], 4);
359            m.insert_symlink(5, "symlink-target".to_string(), S_IFLNK | 0o777, 2, 3, xattr.clone());
360            m.add_child(&["foo", "baz"], 5);
361            m
362        };
363
364        // Build it twice and verify that the serialized representations match.
365        let data1 = build_fs().serialize();
366        let data2 = build_fs().serialize();
367        assert_eq!(data1, data2);
368    }
369
370    #[test]
371    fn test_dedup_extended_attributes() {
372        let mut m = Metadata::new();
373        m.insert_directory(ROOT_INODE_NUM, S_IFDIR | 0o755, 2, 3, Default::default());
374        m.insert_file(3, S_IFREG, 2, 3, [("a".into(), "b".into())].into());
375        m.add_child(&["foo"], 3);
376        m.insert_file(4, S_IFREG, 2, 3, [("a".into(), "b".into())].into());
377        m.add_child(&["bar"], 4);
378
379        // The extended attributes are equal but they haven't been deduplicated yet.
380        assert_eq!(
381            m.get(3).unwrap().extended_attributes.0,
382            m.get(4).unwrap().extended_attributes.0
383        );
384        assert!(!Arc::ptr_eq(
385            &m.get(3).unwrap().extended_attributes.0,
386            &m.get(4).unwrap().extended_attributes.0
387        ));
388
389        m.dedup_xattrs();
390        // The extended attributes are now deduplicated.
391        assert!(Arc::ptr_eq(
392            &m.get(3).unwrap().extended_attributes.0,
393            &m.get(4).unwrap().extended_attributes.0
394        ));
395
396        // The extended attributes are duplicated in the serialized data but get deduplicated again
397        // when being deserialized.
398        let m = Metadata::deserialize(&m.serialize()).unwrap();
399        assert!(Arc::ptr_eq(
400            &m.get(3).unwrap().extended_attributes.0,
401            &m.get(4).unwrap().extended_attributes.0
402        ));
403    }
404
405    #[test]
406    fn test_extended_attribute_serialization_backwards_compatibility() {
407        let xattrs = [(b"a", b"b"), (b"c", b"d"), (b"e", b"f")];
408        let btree_map: BTreeMap<Box<[u8]>, Box<[u8]>> =
409            xattrs.iter().map(|(&k, &v)| (k.into(), v.into())).collect();
410
411        let serialized = bincode::serialize(&btree_map).unwrap();
412
413        let extended_attributes: ExtendedAttributes = bincode::deserialize(&serialized).unwrap();
414        let expected_xattrs: ExtendedAttributes =
415            xattrs.iter().map(|(&k, &v)| (k.into(), v.into())).collect();
416        assert_eq!(extended_attributes, expected_xattrs);
417    }
418
419    #[test]
420    fn test_extended_attribute_serialization_forwards_compatibility() {
421        let xattrs = [(b"a", b"b"), (b"c", b"d"), (b"e", b"f")];
422        let extended_attributes: ExtendedAttributes =
423            xattrs.iter().map(|(&k, &v)| (k.into(), v.into())).collect();
424
425        let serialized = bincode::serialize(&extended_attributes).unwrap();
426
427        let btree_map: BTreeMap<Box<[u8]>, Box<[u8]>> = bincode::deserialize(&serialized).unwrap();
428        let expected_xattrs: BTreeMap<Box<[u8]>, Box<[u8]>> =
429            xattrs.iter().map(|(&k, &v)| (k.into(), v.into())).collect();
430        assert_eq!(btree_map, expected_xattrs);
431    }
432}