fxfs/object_store/journal/
super_block.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! We currently store two of these super-blocks (A/B) starting at offset 0 and 512kB.
6//!
7//! Immediately following the serialized `SuperBlockHeader` structure below is a stream of
8//! serialized operations that are replayed into the root parent `ObjectStore`. Note that the root
9//! parent object store exists entirely in RAM until serialized back into the super-block.
10//!
11//! Super-blocks are updated alternately with a monotonically increasing generation number.
12//! At mount time, the super-block used is the valid `SuperBlock` with the highest generation
13//! number.
14//!
15//! Note the asymmetry here regarding load/save:
16//!   * We load a superblock from a Device/SuperBlockInstance and return a
17//!     (SuperBlockHeader, ObjectStore) pair. The ObjectStore is populated directly from device.
18//!   * We save a superblock from a (SuperBlockHeader, Vec<ObjectItem>) pair to a WriteObjectHandle.
19//!
20//! This asymmetry is required for consistency.
21//! The Vec<ObjectItem> is produced by scanning the root_parent_store. This is the responsibility
22//! of the journal code, which must hold a lock to avoid concurrent updates. However, this lock
23//! must NOT be held when saving the superblock as additional extents may need to be allocated as
24//! part of the save process.
25use crate::errors::FxfsError;
26use crate::filesystem::{ApplyContext, ApplyMode, FxFilesystem, JournalingObject};
27use crate::log::*;
28use crate::lsm_tree::types::LayerIterator;
29use crate::lsm_tree::{LSMTree, LayerSet, Query};
30use crate::metrics;
31use crate::object_handle::ObjectHandle as _;
32use crate::object_store::allocator::Reservation;
33use crate::object_store::journal::bootstrap_handle::BootstrapObjectHandle;
34use crate::object_store::journal::reader::{JournalReader, ReadResult};
35use crate::object_store::journal::writer::JournalWriter;
36use crate::object_store::journal::{JournalCheckpoint, JournalCheckpointV32, BLOCK_SIZE};
37use crate::object_store::object_record::{
38    ObjectItem, ObjectItemV40, ObjectItemV41, ObjectItemV43, ObjectItemV46,
39};
40use crate::object_store::transaction::{AssocObj, Options};
41use crate::object_store::tree::MajorCompactable;
42use crate::object_store::{
43    DataObjectHandle, HandleOptions, HandleOwner, Mutation, ObjectKey, ObjectStore, ObjectValue,
44};
45use crate::range::RangeExt;
46use crate::serialized_types::{
47    migrate_to_version, Migrate, Version, Versioned, VersionedLatest, EARLIEST_SUPPORTED_VERSION,
48    FIRST_EXTENT_IN_SUPERBLOCK_VERSION, SMALL_SUPERBLOCK_VERSION,
49};
50use anyhow::{bail, ensure, Context, Error};
51use fprint::TypeFingerprint;
52use fuchsia_inspect::{Property as _, UintProperty};
53use fuchsia_sync::Mutex;
54use futures::FutureExt;
55use rustc_hash::FxHashMap as HashMap;
56use serde::{Deserialize, Serialize};
57use std::collections::VecDeque;
58use std::fmt;
59use std::io::{Read, Write};
60use std::ops::Range;
61use std::sync::Arc;
62use std::time::SystemTime;
63use storage_device::Device;
64use uuid::Uuid;
65
66// These only exist in the root store.
67const SUPER_BLOCK_A_OBJECT_ID: u64 = 1;
68const SUPER_BLOCK_B_OBJECT_ID: u64 = 2;
69
70/// The superblock is extended in units of `SUPER_BLOCK_CHUNK_SIZE` as required.
71pub const SUPER_BLOCK_CHUNK_SIZE: u64 = 65536;
72
73/// Each superblock is one block but may contain records that extend its own length.
74const MIN_SUPER_BLOCK_SIZE: u64 = 4096;
75/// The first 2 * 512 KiB on the disk used to be reserved for two A/B super-blocks.
76const LEGACY_MIN_SUPER_BLOCK_SIZE: u64 = 524_288;
77
78/// All superblocks start with the magic bytes "FxfsSupr".
79const SUPER_BLOCK_MAGIC: &[u8; 8] = b"FxfsSupr";
80
81/// An enum representing one of our super-block instances.
82///
83/// This provides hard-coded constants related to the location and properties of the super-blocks
84/// that are required to bootstrap the filesystem.
85#[derive(Copy, Clone, Debug)]
86pub enum SuperBlockInstance {
87    A,
88    B,
89}
90
91impl SuperBlockInstance {
92    /// Returns the next [SuperBlockInstance] for use in round-robining writes across super-blocks.
93    pub fn next(&self) -> SuperBlockInstance {
94        match self {
95            SuperBlockInstance::A => SuperBlockInstance::B,
96            SuperBlockInstance::B => SuperBlockInstance::A,
97        }
98    }
99
100    pub fn object_id(&self) -> u64 {
101        match self {
102            SuperBlockInstance::A => SUPER_BLOCK_A_OBJECT_ID,
103            SuperBlockInstance::B => SUPER_BLOCK_B_OBJECT_ID,
104        }
105    }
106
107    /// Returns the byte range where the first extent of the [SuperBlockInstance] is stored.
108    /// (Note that a [SuperBlockInstance] may still have multiple extents.)
109    pub fn first_extent(&self) -> Range<u64> {
110        match self {
111            SuperBlockInstance::A => 0..MIN_SUPER_BLOCK_SIZE,
112            SuperBlockInstance::B => 524288..524288 + MIN_SUPER_BLOCK_SIZE,
113        }
114    }
115
116    /// We used to allocate 512kB to superblocks but this was almost always more than needed.
117    pub fn legacy_first_extent(&self) -> Range<u64> {
118        match self {
119            SuperBlockInstance::A => 0..LEGACY_MIN_SUPER_BLOCK_SIZE,
120            SuperBlockInstance::B => LEGACY_MIN_SUPER_BLOCK_SIZE..2 * LEGACY_MIN_SUPER_BLOCK_SIZE,
121        }
122    }
123}
124
125pub type SuperBlockHeader = SuperBlockHeaderV32;
126
127#[derive(
128    Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize, TypeFingerprint, Versioned,
129)]
130pub struct SuperBlockHeaderV32 {
131    /// The globally unique identifier for the filesystem.
132    guid: UuidWrapperV32,
133
134    /// There are two super-blocks which are used in an A/B configuration. The super-block with the
135    /// greatest generation number is what is used when mounting an Fxfs image; the other is
136    /// discarded.
137    pub generation: u64,
138
139    /// The root parent store is an in-memory only store and serves as the backing store for the
140    /// root store and the journal.  The records for this store are serialized into the super-block
141    /// and mutations are also recorded in the journal.
142    pub root_parent_store_object_id: u64,
143
144    /// The root parent needs a graveyard and there's nowhere else to store it other than in the
145    /// super-block.
146    pub root_parent_graveyard_directory_object_id: u64,
147
148    /// The root object store contains all other metadata objects (including the allocator, the
149    /// journal and the super-blocks) and is the parent for all other object stores.
150    pub root_store_object_id: u64,
151
152    /// This is in the root object store.
153    pub allocator_object_id: u64,
154
155    /// This is in the root parent object store.
156    pub journal_object_id: u64,
157
158    /// Start checkpoint for the journal file.
159    pub journal_checkpoint: JournalCheckpointV32,
160
161    /// Offset of the journal file when the super-block was written.  If no entry is present in
162    /// journal_file_offsets for a particular object, then an object might have dependencies on the
163    /// journal from super_block_journal_file_offset onwards, but not earlier.
164    pub super_block_journal_file_offset: u64,
165
166    /// object id -> journal file offset. Indicates where each object has been flushed to.
167    pub journal_file_offsets: HashMap<u64, u64>,
168
169    /// Records the amount of borrowed metadata space as applicable at
170    /// `super_block_journal_file_offset`.
171    pub borrowed_metadata_space: u64,
172
173    /// The earliest version of Fxfs used to create any still-existing struct in the filesystem.
174    ///
175    /// Note: structs in the filesystem may had been made with various different versions of Fxfs.
176    pub earliest_version: Version,
177}
178
179type UuidWrapper = UuidWrapperV32;
180#[derive(Clone, Default, Eq, PartialEq)]
181struct UuidWrapperV32(Uuid);
182
183impl UuidWrapper {
184    fn new() -> Self {
185        Self(Uuid::new_v4())
186    }
187    #[cfg(test)]
188    fn nil() -> Self {
189        Self(Uuid::nil())
190    }
191}
192
193impl fmt::Debug for UuidWrapper {
194    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
195        // The UUID uniquely identifies the filesystem, so we should redact it so that we don't leak
196        // it in logs.
197        f.write_str("<redacted>")
198    }
199}
200
201impl TypeFingerprint for UuidWrapper {
202    fn fingerprint() -> String {
203        "<[u8;16]>".to_owned()
204    }
205}
206
207// Uuid serializes like a slice, but SuperBlockHeader used to contain [u8; 16] and we want to remain
208// compatible.
209impl Serialize for UuidWrapper {
210    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
211        self.0.as_bytes().serialize(serializer)
212    }
213}
214
215impl<'de> Deserialize<'de> for UuidWrapper {
216    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
217        <[u8; 16]>::deserialize(deserializer).map(|bytes| UuidWrapperV32(Uuid::from_bytes(bytes)))
218    }
219}
220
221pub type SuperBlockRecord = SuperBlockRecordV46;
222
223#[allow(clippy::large_enum_variant)]
224#[derive(Debug, Serialize, Deserialize, TypeFingerprint, Versioned)]
225pub enum SuperBlockRecordV46 {
226    // When reading the super-block we know the initial extent, but not subsequent extents, so these
227    // records need to exist to allow us to completely read the super-block.
228    Extent(Range<u64>),
229
230    // Following the super-block header are ObjectItem records that are to be replayed into the root
231    // parent object store.
232    ObjectItem(ObjectItemV46),
233
234    // Marks the end of the full super-block.
235    End,
236}
237
238#[allow(clippy::large_enum_variant)]
239#[derive(Migrate, Serialize, Deserialize, TypeFingerprint, Versioned)]
240#[migrate_to_version(SuperBlockRecordV46)]
241pub enum SuperBlockRecordV43 {
242    Extent(Range<u64>),
243    ObjectItem(ObjectItemV43),
244    End,
245}
246
247#[derive(Migrate, Serialize, Deserialize, TypeFingerprint, Versioned)]
248#[migrate_to_version(SuperBlockRecordV43)]
249pub enum SuperBlockRecordV41 {
250    Extent(Range<u64>),
251    ObjectItem(ObjectItemV41),
252    End,
253}
254
255#[derive(Migrate, Serialize, Deserialize, TypeFingerprint, Versioned)]
256#[migrate_to_version(SuperBlockRecordV41)]
257pub enum SuperBlockRecordV40 {
258    Extent(Range<u64>),
259    ObjectItem(ObjectItemV40),
260    End,
261}
262
263struct SuperBlockMetrics {
264    /// Time we wrote the most recent superblock in milliseconds since [`std::time::UNIX_EPOCH`].
265    /// Uses [`std::time::SystemTime`] as the clock source.
266    last_super_block_update_time_ms: UintProperty,
267
268    /// Offset of the most recent superblock we wrote in the journal.
269    last_super_block_offset: UintProperty,
270}
271
272impl Default for SuperBlockMetrics {
273    fn default() -> Self {
274        SuperBlockMetrics {
275            last_super_block_update_time_ms: metrics::detail()
276                .create_uint("last_super_block_update_time_ms", 0),
277            last_super_block_offset: metrics::detail().create_uint("last_super_block_offset", 0),
278        }
279    }
280}
281
282/// Reads an individual (A/B) super-block instance and root_parent_store from device.
283/// Users should use SuperBlockManager::load() instead.
284async fn read(
285    device: Arc<dyn Device>,
286    block_size: u64,
287    instance: SuperBlockInstance,
288) -> Result<(SuperBlockHeader, SuperBlockInstance, ObjectStore), Error> {
289    let (super_block_header, mut reader) = SuperBlockHeader::read_header(device.clone(), instance)
290        .await
291        .context("failed to read superblock")?;
292    let root_parent = ObjectStore::new_root_parent(
293        device,
294        block_size,
295        super_block_header.root_parent_store_object_id,
296    );
297    root_parent.set_graveyard_directory_object_id(
298        super_block_header.root_parent_graveyard_directory_object_id,
299    );
300
301    loop {
302        // TODO: Flatten a layer and move reader here?
303        let (mutation, sequence) = match reader.next_item().await? {
304            // RecordReader should filter out extent records.
305            SuperBlockRecord::Extent(_) => bail!("Unexpected extent record"),
306            SuperBlockRecord::ObjectItem(item) => {
307                (Mutation::insert_object(item.key, item.value), item.sequence)
308            }
309            SuperBlockRecord::End => break,
310        };
311        root_parent.apply_mutation(
312            mutation,
313            &ApplyContext {
314                mode: ApplyMode::Replay,
315                checkpoint: JournalCheckpoint { file_offset: sequence, ..Default::default() },
316            },
317            AssocObj::None,
318        )?;
319    }
320    Ok((super_block_header, instance, root_parent))
321}
322
323/// Write a super-block to the given file handle.
324/// Requires that the filesystem is fully loaded and writable as this may require allocation.
325async fn write<S: HandleOwner>(
326    super_block_header: &SuperBlockHeader,
327    items: LayerSet<ObjectKey, ObjectValue>,
328    handle: DataObjectHandle<S>,
329) -> Result<(), Error> {
330    let object_manager = handle.store().filesystem().object_manager().clone();
331    // TODO(https://fxbug.dev/42177407): Don't use the same code here for Journal and SuperBlock. They
332    // aren't the same things and it is already getting convoluted. e.g of diff stream content:
333    //   Superblock:  (Magic, Ver, Header(Ver), Extent(Ver)*, SuperBlockRecord(Ver)*, ...)
334    //   Journal:     (Ver, JournalRecord(Ver)*, RESET, Ver2, JournalRecord(Ver2)*, ...)
335    // We should abstract away the checksum code and implement these separately.
336
337    let mut writer =
338        SuperBlockWriter::new(handle, super_block_header, object_manager.metadata_reservation())
339            .await?;
340    let mut merger = items.merger();
341    let mut iter = LSMTree::major_iter(merger.query(Query::FullScan).await?).await?;
342    while let Some(item) = iter.get() {
343        writer.write_root_parent_item(item.cloned()).await?;
344        iter.advance().await?;
345    }
346    writer.finalize().await
347}
348
349// Compacts and returns the *old* snapshot of the root_parent store.
350// Must be performed whilst holding a writer lock.
351pub fn compact_root_parent(
352    root_parent_store: &ObjectStore,
353) -> Result<LayerSet<ObjectKey, ObjectValue>, Error> {
354    // The root parent always uses in-memory layers which shouldn't be async, so we can use
355    // `now_or_never`.
356    let tree = root_parent_store.tree();
357    let layer_set = tree.layer_set();
358    {
359        let mut merger = layer_set.merger();
360        let mut iter = LSMTree::major_iter(merger.query(Query::FullScan).now_or_never().unwrap()?)
361            .now_or_never()
362            .unwrap()?;
363        let new_layer = LSMTree::new_mutable_layer();
364        while let Some(item_ref) = iter.get() {
365            new_layer.insert(item_ref.cloned())?;
366            iter.advance().now_or_never().unwrap()?;
367        }
368        tree.set_mutable_layer(new_layer);
369    }
370    Ok(layer_set)
371}
372
373/// This encapsulates the A/B alternating super-block logic.
374/// All super-block load/save operations should be via the methods on this type.
375pub struct SuperBlockManager {
376    next_instance: Arc<Mutex<SuperBlockInstance>>,
377    metrics: SuperBlockMetrics,
378}
379
380impl SuperBlockManager {
381    pub fn new() -> Self {
382        Self {
383            next_instance: Arc::new(Mutex::new(SuperBlockInstance::A)),
384            metrics: Default::default(),
385        }
386    }
387
388    /// Loads both A/B super-blocks and root_parent ObjectStores and and returns the newest valid
389    /// pair. Also ensures the next superblock updated via |save| will be the other instance.
390    pub async fn load(
391        &self,
392        device: Arc<dyn Device>,
393        block_size: u64,
394    ) -> Result<(SuperBlockHeader, ObjectStore), Error> {
395        // Superblocks consume a minimum of one block. We currently hard code the length of
396        // this first extent. It should work with larger block sizes, but has not been tested.
397        // TODO(https://fxbug.dev/42063349): Consider relaxing this.
398        debug_assert!(MIN_SUPER_BLOCK_SIZE == block_size);
399
400        let (super_block, current_super_block, root_parent) = match futures::join!(
401            read(device.clone(), block_size, SuperBlockInstance::A),
402            read(device.clone(), block_size, SuperBlockInstance::B)
403        ) {
404            (Err(e1), Err(e2)) => {
405                bail!("Failed to load both superblocks due to {:?}\nand\n{:?}", e1, e2)
406            }
407            (Ok(result), Err(_)) => result,
408            (Err(_), Ok(result)) => result,
409            (Ok(result1), Ok(result2)) => {
410                // Break the tie by taking the super-block with the greatest generation.
411                if result2.0.generation > result1.0.generation {
412                    result2
413                } else {
414                    result1
415                }
416            }
417        };
418        info!(super_block:?, current_super_block:?; "loaded super-block");
419        *self.next_instance.lock() = current_super_block.next();
420        Ok((super_block, root_parent))
421    }
422
423    /// Writes the provided superblock and root_parent ObjectStore to the device.
424    /// Requires that the filesystem is fully loaded and writable as this may require allocation.
425    pub async fn save(
426        &self,
427        super_block_header: SuperBlockHeader,
428        filesystem: Arc<FxFilesystem>,
429        root_parent: LayerSet<ObjectKey, ObjectValue>,
430    ) -> Result<(), Error> {
431        let root_store = filesystem.root_store();
432        let object_id = {
433            let mut next_instance = self.next_instance.lock();
434            let object_id = next_instance.object_id();
435            *next_instance = next_instance.next();
436            object_id
437        };
438        let handle = ObjectStore::open_object(
439            &root_store,
440            object_id,
441            HandleOptions { skip_journal_checks: true, ..Default::default() },
442            None,
443        )
444        .await
445        .context("Failed to open superblock object")?;
446        write(&super_block_header, root_parent, handle).await?;
447        self.metrics
448            .last_super_block_offset
449            .set(super_block_header.super_block_journal_file_offset);
450        self.metrics.last_super_block_update_time_ms.set(
451            SystemTime::now()
452                .duration_since(SystemTime::UNIX_EPOCH)
453                .unwrap()
454                .as_millis()
455                .try_into()
456                .unwrap_or(0u64),
457        );
458        Ok(())
459    }
460}
461
462impl SuperBlockHeader {
463    /// Creates a new instance with random GUID.
464    pub fn new(
465        root_parent_store_object_id: u64,
466        root_parent_graveyard_directory_object_id: u64,
467        root_store_object_id: u64,
468        allocator_object_id: u64,
469        journal_object_id: u64,
470        journal_checkpoint: JournalCheckpoint,
471        earliest_version: Version,
472    ) -> Self {
473        SuperBlockHeader {
474            guid: UuidWrapper::new(),
475            generation: 1u64,
476            root_parent_store_object_id,
477            root_parent_graveyard_directory_object_id,
478            root_store_object_id,
479            allocator_object_id,
480            journal_object_id,
481            journal_checkpoint,
482            earliest_version,
483            ..Default::default()
484        }
485    }
486
487    /// Read the super-block header, and return it and a reader that produces the records that are
488    /// to be replayed in to the root parent object store.
489    async fn read_header(
490        device: Arc<dyn Device>,
491        target_super_block: SuperBlockInstance,
492    ) -> Result<(SuperBlockHeader, RecordReader), Error> {
493        let handle = BootstrapObjectHandle::new(
494            target_super_block.object_id(),
495            device,
496            target_super_block.first_extent(),
497        );
498        let mut reader = JournalReader::new(handle, &JournalCheckpoint::default());
499        reader.set_eof_ok();
500
501        reader.fill_buf().await?;
502
503        let mut super_block_header;
504        let super_block_version;
505        reader.consume({
506            let mut cursor = std::io::Cursor::new(reader.buffer());
507            // Validate magic bytes.
508            let mut magic_bytes: [u8; 8] = [0; 8];
509            cursor.read_exact(&mut magic_bytes)?;
510            if magic_bytes.as_slice() != SUPER_BLOCK_MAGIC.as_slice() {
511                bail!("Invalid magic: {:?}", magic_bytes);
512            }
513            (super_block_header, super_block_version) =
514                SuperBlockHeader::deserialize_with_version(&mut cursor)?;
515
516            if super_block_version < EARLIEST_SUPPORTED_VERSION {
517                bail!("Unsupported SuperBlock version: {:?}", super_block_version);
518            }
519
520            // NOTE: It is possible that data was written to the journal with an old version
521            // but no compaction ever happened, so the journal version could potentially be older
522            // than the layer file versions.
523            if super_block_header.journal_checkpoint.version < EARLIEST_SUPPORTED_VERSION {
524                bail!(
525                    "Unsupported JournalCheckpoint version: {:?}",
526                    super_block_header.journal_checkpoint.version
527                );
528            }
529
530            if super_block_header.earliest_version < EARLIEST_SUPPORTED_VERSION {
531                bail!(
532                    "Filesystem contains struct with unsupported version: {:?}",
533                    super_block_header.earliest_version
534                );
535            }
536
537            cursor.position() as usize
538        });
539
540        // From version 45 superblocks describe their own extents (a noop here).
541        // At version 44, superblocks assume a 4kb first extent.
542        // Prior to version 44, superblocks assume a 512kb first extent.
543        if super_block_version < SMALL_SUPERBLOCK_VERSION {
544            reader.handle().push_extent(0, target_super_block.legacy_first_extent());
545        } else if super_block_version < FIRST_EXTENT_IN_SUPERBLOCK_VERSION {
546            reader.handle().push_extent(0, target_super_block.first_extent())
547        }
548
549        // If guid is zeroed (e.g. in a newly imaged system), assign one randomly.
550        if super_block_header.guid.0.is_nil() {
551            super_block_header.guid = UuidWrapper::new();
552        }
553        reader.set_version(super_block_version);
554        Ok((super_block_header, RecordReader { reader }))
555    }
556}
557
558struct SuperBlockWriter<'a, S: HandleOwner> {
559    handle: DataObjectHandle<S>,
560    writer: JournalWriter,
561    existing_extents: VecDeque<(u64, Range<u64>)>,
562    size: u64,
563    reservation: &'a Reservation,
564}
565
566impl<'a, S: HandleOwner> SuperBlockWriter<'a, S> {
567    /// Create a new writer, outputs FXFS magic, version and SuperBlockHeader.
568    /// On success, the writer is ready to accept root parent store mutations.
569    pub async fn new(
570        handle: DataObjectHandle<S>,
571        super_block_header: &SuperBlockHeader,
572        reservation: &'a Reservation,
573    ) -> Result<Self, Error> {
574        let existing_extents = handle.device_extents().await?;
575        let mut this = Self {
576            handle,
577            writer: JournalWriter::new(BLOCK_SIZE as usize, 0),
578            existing_extents: existing_extents.into_iter().collect(),
579            size: 0,
580            reservation,
581        };
582        this.writer.write_all(SUPER_BLOCK_MAGIC)?;
583        super_block_header.serialize_with_version(&mut this.writer)?;
584        Ok(this)
585    }
586
587    /// Internal helper function to pull ranges from a list of existing extents and tack
588    /// corresponding extent records onto the journal.
589    fn try_extend_existing(&mut self, target_size: u64) -> Result<(), Error> {
590        while self.size < target_size {
591            if let Some((offset, range)) = self.existing_extents.pop_front() {
592                ensure!(offset == self.size, "superblock file contains a hole.");
593                self.size += range.end - range.start;
594                SuperBlockRecord::Extent(range).serialize_into(&mut self.writer)?;
595            } else {
596                break;
597            }
598        }
599        Ok(())
600    }
601
602    pub async fn write_root_parent_item(&mut self, record: ObjectItem) -> Result<(), Error> {
603        let min_len = self.writer.journal_file_checkpoint().file_offset + SUPER_BLOCK_CHUNK_SIZE;
604        self.try_extend_existing(min_len)?;
605        if min_len > self.size {
606            // Need to allocate some more space.
607            let mut transaction = self
608                .handle
609                .new_transaction_with_options(Options {
610                    skip_journal_checks: true,
611                    borrow_metadata_space: true,
612                    allocator_reservation: Some(self.reservation),
613                    ..Default::default()
614                })
615                .await?;
616            let mut file_range = self.size..self.size + SUPER_BLOCK_CHUNK_SIZE;
617            let allocated = self
618                .handle
619                .preallocate_range(&mut transaction, &mut file_range)
620                .await
621                .context("preallocate superblock")?;
622            if file_range.start < file_range.end {
623                bail!("preallocate_range returned too little space");
624            }
625            transaction.commit().await?;
626            for device_range in allocated {
627                self.size += device_range.end - device_range.start;
628                SuperBlockRecord::Extent(device_range).serialize_into(&mut self.writer)?;
629            }
630        }
631        SuperBlockRecord::ObjectItem(record).serialize_into(&mut self.writer)?;
632        Ok(())
633    }
634
635    pub async fn finalize(mut self) -> Result<(), Error> {
636        SuperBlockRecord::End.serialize_into(&mut self.writer)?;
637        self.writer.pad_to_block()?;
638        let mut buf = self.handle.allocate_buffer(self.writer.flushable_bytes()).await;
639        let offset = self.writer.take_flushable(buf.as_mut());
640        self.handle.overwrite(offset, buf.as_mut(), false).await?;
641        let len =
642            std::cmp::max(MIN_SUPER_BLOCK_SIZE, self.writer.journal_file_checkpoint().file_offset)
643                + SUPER_BLOCK_CHUNK_SIZE;
644        self.handle
645            .truncate_with_options(
646                Options {
647                    skip_journal_checks: true,
648                    borrow_metadata_space: true,
649                    ..Default::default()
650                },
651                len,
652            )
653            .await?;
654        Ok(())
655    }
656}
657
658pub struct RecordReader {
659    reader: JournalReader,
660}
661
662impl RecordReader {
663    pub async fn next_item(&mut self) -> Result<SuperBlockRecord, Error> {
664        loop {
665            match self.reader.deserialize().await? {
666                ReadResult::Reset(_) => bail!("Unexpected reset"),
667                ReadResult::ChecksumMismatch => bail!("Checksum mismatch"),
668                ReadResult::Some(SuperBlockRecord::Extent(extent)) => {
669                    ensure!(extent.is_valid(), FxfsError::Inconsistent);
670                    self.reader.handle().push_extent(0, extent)
671                }
672                ReadResult::Some(x) => return Ok(x),
673            }
674        }
675    }
676}
677
678#[cfg(test)]
679mod tests {
680    use super::{
681        compact_root_parent, write, SuperBlockHeader, SuperBlockInstance, UuidWrapper,
682        MIN_SUPER_BLOCK_SIZE, SUPER_BLOCK_CHUNK_SIZE,
683    };
684    use crate::filesystem::{FxFilesystem, OpenFxFilesystem};
685    use crate::object_handle::ReadObjectHandle;
686    use crate::object_store::journal::JournalCheckpoint;
687    use crate::object_store::transaction::{lock_keys, Options};
688    use crate::object_store::{
689        DataObjectHandle, HandleOptions, ObjectHandle, ObjectKey, ObjectStore,
690    };
691    use crate::serialized_types::LATEST_VERSION;
692    use storage_device::fake_device::FakeDevice;
693    use storage_device::DeviceHolder;
694
695    // We require 512kiB each for A/B super-blocks, 256kiB for the journal (128kiB before flush)
696    // and compactions require double the layer size to complete.
697    const TEST_DEVICE_BLOCK_SIZE: u32 = 512;
698    const TEST_DEVICE_BLOCK_COUNT: u64 = 16384;
699
700    async fn filesystem_and_super_block_handles(
701    ) -> (OpenFxFilesystem, DataObjectHandle<ObjectStore>, DataObjectHandle<ObjectStore>) {
702        let device =
703            DeviceHolder::new(FakeDevice::new(TEST_DEVICE_BLOCK_COUNT, TEST_DEVICE_BLOCK_SIZE));
704        let fs = FxFilesystem::new_empty(device).await.expect("new_empty failed");
705        fs.close().await.expect("Close failed");
706        let device = fs.take_device().await;
707        device.reopen(false);
708        let fs = FxFilesystem::open(device).await.expect("open failed");
709
710        let handle_a = ObjectStore::open_object(
711            &fs.object_manager().root_store(),
712            SuperBlockInstance::A.object_id(),
713            HandleOptions::default(),
714            None,
715        )
716        .await
717        .expect("open superblock failed");
718
719        let handle_b = ObjectStore::open_object(
720            &fs.object_manager().root_store(),
721            SuperBlockInstance::B.object_id(),
722            HandleOptions::default(),
723            None,
724        )
725        .await
726        .expect("open superblock failed");
727        (fs, handle_a, handle_b)
728    }
729
730    #[fuchsia::test]
731    async fn test_read_written_super_block() {
732        let (fs, _handle_a, _handle_b) = filesystem_and_super_block_handles().await;
733        const JOURNAL_OBJECT_ID: u64 = 5;
734
735        // Confirm that the (first) super-block is expected size.
736        // It should be MIN_SUPER_BLOCK_SIZE + SUPER_BLOCK_CHUNK_SIZE.
737        assert_eq!(
738            ObjectStore::open_object(
739                &fs.root_store(),
740                SuperBlockInstance::A.object_id(),
741                HandleOptions::default(),
742                None,
743            )
744            .await
745            .expect("open_object failed")
746            .get_size(),
747            MIN_SUPER_BLOCK_SIZE + SUPER_BLOCK_CHUNK_SIZE
748        );
749
750        // Create a large number of objects in the root parent store so that we test growing
751        // of the super-block file, requiring us to add extents.
752        let mut created_object_ids = vec![];
753        const NUM_ENTRIES: u64 = 16384;
754        for _ in 0..NUM_ENTRIES {
755            let mut transaction = fs
756                .clone()
757                .new_transaction(lock_keys![], Options::default())
758                .await
759                .expect("new_transaction failed");
760            created_object_ids.push(
761                ObjectStore::create_object(
762                    &fs.object_manager().root_parent_store(),
763                    &mut transaction,
764                    HandleOptions::default(),
765                    None,
766                )
767                .await
768                .expect("create_object failed")
769                .object_id(),
770            );
771            transaction.commit().await.expect("commit failed");
772        }
773
774        // Note here that DataObjectHandle caches the size given to it at construction.
775        // If we want to know the true size after a super-block has been written, we need
776        // a new handle.
777        assert!(
778            ObjectStore::open_object(
779                &fs.root_store(),
780                SuperBlockInstance::A.object_id(),
781                HandleOptions::default(),
782                None,
783            )
784            .await
785            .expect("open_object failed")
786            .get_size()
787                > MIN_SUPER_BLOCK_SIZE + SUPER_BLOCK_CHUNK_SIZE
788        );
789
790        let written_super_block_a =
791            SuperBlockHeader::read_header(fs.device(), SuperBlockInstance::A)
792                .await
793                .expect("read failed");
794        let written_super_block_b =
795            SuperBlockHeader::read_header(fs.device(), SuperBlockInstance::B)
796                .await
797                .expect("read failed");
798
799        // Check that a non-zero GUID has been assigned.
800        assert!(!written_super_block_a.0.guid.0.is_nil());
801
802        // Depending on specific offsets is fragile so we just validate the fields we believe
803        // to be stable.
804        assert_eq!(written_super_block_a.0.guid, written_super_block_b.0.guid);
805        assert_eq!(written_super_block_a.0.guid, written_super_block_b.0.guid);
806        assert!(written_super_block_a.0.generation != written_super_block_b.0.generation);
807        assert_eq!(
808            written_super_block_a.0.root_parent_store_object_id,
809            written_super_block_b.0.root_parent_store_object_id
810        );
811        assert_eq!(
812            written_super_block_a.0.root_parent_graveyard_directory_object_id,
813            written_super_block_b.0.root_parent_graveyard_directory_object_id
814        );
815        assert_eq!(written_super_block_a.0.root_store_object_id, fs.root_store().store_object_id());
816        assert_eq!(
817            written_super_block_a.0.root_store_object_id,
818            written_super_block_b.0.root_store_object_id
819        );
820        assert_eq!(written_super_block_a.0.allocator_object_id, fs.allocator().object_id());
821        assert_eq!(
822            written_super_block_a.0.allocator_object_id,
823            written_super_block_b.0.allocator_object_id
824        );
825        assert_eq!(written_super_block_a.0.journal_object_id, JOURNAL_OBJECT_ID);
826        assert_eq!(
827            written_super_block_a.0.journal_object_id,
828            written_super_block_b.0.journal_object_id
829        );
830        assert!(
831            written_super_block_a.0.journal_checkpoint.file_offset
832                != written_super_block_b.0.journal_checkpoint.file_offset
833        );
834        assert!(
835            written_super_block_a.0.super_block_journal_file_offset
836                != written_super_block_b.0.super_block_journal_file_offset
837        );
838        // Nb: We skip journal_file_offsets and borrowed metadata space checks.
839        assert_eq!(written_super_block_a.0.earliest_version, LATEST_VERSION);
840        assert_eq!(
841            written_super_block_a.0.earliest_version,
842            written_super_block_b.0.earliest_version
843        );
844
845        // Nb: Skip comparison of root_parent store contents because we have no way of anticipating
846        // the extent offsets and it is reasonable that a/b differ.
847
848        // Delete all the objects we just made.
849        for object_id in created_object_ids {
850            let mut transaction = fs
851                .clone()
852                .new_transaction(lock_keys![], Options::default())
853                .await
854                .expect("new_transaction failed");
855            fs.object_manager()
856                .root_parent_store()
857                .adjust_refs(&mut transaction, object_id, -1)
858                .await
859                .expect("adjust_refs failed");
860            transaction.commit().await.expect("commit failed");
861            fs.object_manager()
862                .root_parent_store()
863                .tombstone_object(object_id, Options::default())
864                .await
865                .expect("tombstone failed");
866        }
867        // Write some stuff to the root store to ensure we rotate the journal and produce new
868        // super blocks.
869        for _ in 0..NUM_ENTRIES {
870            let mut transaction = fs
871                .clone()
872                .new_transaction(lock_keys![], Options::default())
873                .await
874                .expect("new_transaction failed");
875            ObjectStore::create_object(
876                &fs.object_manager().root_store(),
877                &mut transaction,
878                HandleOptions::default(),
879                None,
880            )
881            .await
882            .expect("create_object failed");
883            transaction.commit().await.expect("commit failed");
884        }
885
886        assert_eq!(
887            ObjectStore::open_object(
888                &fs.root_store(),
889                SuperBlockInstance::A.object_id(),
890                HandleOptions::default(),
891                None,
892            )
893            .await
894            .expect("open_object failed")
895            .get_size(),
896            MIN_SUPER_BLOCK_SIZE + SUPER_BLOCK_CHUNK_SIZE
897        );
898    }
899
900    #[fuchsia::test]
901    async fn test_guid_assign_on_read() {
902        let (fs, handle_a, _handle_b) = filesystem_and_super_block_handles().await;
903        const JOURNAL_OBJECT_ID: u64 = 5;
904        let mut super_block_header_a = SuperBlockHeader::new(
905            fs.object_manager().root_parent_store().store_object_id(),
906            /* root_parent_graveyard_directory_object_id: */ 1000,
907            fs.root_store().store_object_id(),
908            fs.allocator().object_id(),
909            JOURNAL_OBJECT_ID,
910            JournalCheckpoint { file_offset: 1234, checksum: 5678, version: LATEST_VERSION },
911            /* earliest_version: */ LATEST_VERSION,
912        );
913        // Ensure the superblock has no set GUID.
914        super_block_header_a.guid = UuidWrapper::nil();
915        write(
916            &super_block_header_a,
917            compact_root_parent(fs.object_manager().root_parent_store().as_ref())
918                .expect("scan failed"),
919            handle_a,
920        )
921        .await
922        .expect("write failed");
923        let super_block_header = SuperBlockHeader::read_header(fs.device(), SuperBlockInstance::A)
924            .await
925            .expect("read failed");
926        // Ensure a GUID has been assigned.
927        assert!(!super_block_header.0.guid.0.is_nil());
928    }
929
930    #[fuchsia::test]
931    async fn test_init_wipes_superblocks() {
932        let device = DeviceHolder::new(FakeDevice::new(8192, TEST_DEVICE_BLOCK_SIZE));
933
934        let fs = FxFilesystem::new_empty(device).await.expect("new_empty failed");
935        let root_store = fs.root_store();
936        // Generate enough work to induce a journal flush and thus a new superblock being written.
937        for _ in 0..6000 {
938            let mut transaction = fs
939                .clone()
940                .new_transaction(lock_keys![], Options::default())
941                .await
942                .expect("new_transaction failed");
943            ObjectStore::create_object(
944                &root_store,
945                &mut transaction,
946                HandleOptions::default(),
947                None,
948            )
949            .await
950            .expect("create_object failed");
951            transaction.commit().await.expect("commit failed");
952        }
953        fs.close().await.expect("Close failed");
954        let device = fs.take_device().await;
955        device.reopen(false);
956
957        SuperBlockHeader::read_header(device.clone(), SuperBlockInstance::A)
958            .await
959            .expect("read failed");
960        let header = SuperBlockHeader::read_header(device.clone(), SuperBlockInstance::B)
961            .await
962            .expect("read failed");
963
964        let old_guid = header.0.guid;
965
966        // Re-initialize the filesystem.  The A and B blocks should be for the new FS.
967        let fs = FxFilesystem::new_empty(device).await.expect("new_empty failed");
968        fs.close().await.expect("Close failed");
969        let device = fs.take_device().await;
970        device.reopen(false);
971
972        let a = SuperBlockHeader::read_header(device.clone(), SuperBlockInstance::A)
973            .await
974            .expect("read failed");
975        let b = SuperBlockHeader::read_header(device.clone(), SuperBlockInstance::B)
976            .await
977            .expect("read failed");
978
979        assert_eq!(a.0.guid, b.0.guid);
980        assert_ne!(old_guid, a.0.guid);
981    }
982
983    #[fuchsia::test]
984    async fn test_alternating_super_blocks() {
985        let device = DeviceHolder::new(FakeDevice::new(8192, TEST_DEVICE_BLOCK_SIZE));
986
987        let fs = FxFilesystem::new_empty(device).await.expect("new_empty failed");
988        fs.close().await.expect("Close failed");
989        let device = fs.take_device().await;
990        device.reopen(false);
991
992        let (super_block_header_a, _) =
993            SuperBlockHeader::read_header(device.clone(), SuperBlockInstance::A)
994                .await
995                .expect("read failed");
996
997        // The second super-block won't be valid at this time so there's no point reading it.
998
999        let fs = FxFilesystem::open(device).await.expect("open failed");
1000        let root_store = fs.root_store();
1001        // Generate enough work to induce a journal flush.
1002        for _ in 0..6000 {
1003            let mut transaction = fs
1004                .clone()
1005                .new_transaction(lock_keys![], Options::default())
1006                .await
1007                .expect("new_transaction failed");
1008            ObjectStore::create_object(
1009                &root_store,
1010                &mut transaction,
1011                HandleOptions::default(),
1012                None,
1013            )
1014            .await
1015            .expect("create_object failed");
1016            transaction.commit().await.expect("commit failed");
1017        }
1018        fs.close().await.expect("Close failed");
1019        let device = fs.take_device().await;
1020        device.reopen(false);
1021
1022        let (super_block_header_a_after, _) =
1023            SuperBlockHeader::read_header(device.clone(), SuperBlockInstance::A)
1024                .await
1025                .expect("read failed");
1026        let (super_block_header_b_after, _) =
1027            SuperBlockHeader::read_header(device.clone(), SuperBlockInstance::B)
1028                .await
1029                .expect("read failed");
1030
1031        // It's possible that multiple super-blocks were written, so cater for that.
1032
1033        // The sequence numbers should be one apart.
1034        assert_eq!(
1035            (super_block_header_b_after.generation as i64
1036                - super_block_header_a_after.generation as i64)
1037                .abs(),
1038            1
1039        );
1040
1041        // At least one super-block should have been written.
1042        assert!(
1043            std::cmp::max(
1044                super_block_header_a_after.generation,
1045                super_block_header_b_after.generation
1046            ) > super_block_header_a.generation
1047        );
1048
1049        // They should have the same oddness.
1050        assert_eq!(super_block_header_a_after.generation & 1, super_block_header_a.generation & 1);
1051    }
1052
1053    #[fuchsia::test]
1054    async fn test_root_parent_is_compacted() {
1055        let device = DeviceHolder::new(FakeDevice::new(8192, TEST_DEVICE_BLOCK_SIZE));
1056
1057        let fs = FxFilesystem::new_empty(device).await.expect("new_empty failed");
1058
1059        let mut transaction = fs
1060            .clone()
1061            .new_transaction(lock_keys![], Options::default())
1062            .await
1063            .expect("new_transaction failed");
1064        let store = fs.root_parent_store();
1065        let handle =
1066            ObjectStore::create_object(&store, &mut transaction, HandleOptions::default(), None)
1067                .await
1068                .expect("create_object failed");
1069        transaction.commit().await.expect("commit failed");
1070
1071        store
1072            .tombstone_object(handle.object_id(), Options::default())
1073            .await
1074            .expect("tombstone failed");
1075
1076        // Generate enough work to induce a journal flush.
1077        let root_store = fs.root_store();
1078        for _ in 0..6000 {
1079            let mut transaction = fs
1080                .clone()
1081                .new_transaction(lock_keys![], Options::default())
1082                .await
1083                .expect("new_transaction failed");
1084            ObjectStore::create_object(
1085                &root_store,
1086                &mut transaction,
1087                HandleOptions::default(),
1088                None,
1089            )
1090            .await
1091            .expect("create_object failed");
1092            transaction.commit().await.expect("commit failed");
1093        }
1094
1095        // The root parent store should have been compacted, so we shouldn't be able to find any
1096        // record referring to the object we tombstoned.
1097        assert_eq!(
1098            store.tree().find(&ObjectKey::object(handle.object_id())).await.expect("find failed"),
1099            None
1100        );
1101    }
1102}