fuchsia_inspect_contrib/
self_profile.rs

1// Copyright 2023 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! Utility for profiling nested/hierarchical durations and reporting their accumulated runtime
6//! in inspect. See caveats below before using.
7//!
8//! # Caveats
9//!
10//! Profiling data collected by this module has some runtime overhead depending on the level of
11//! nesting and frequency of entrances and exits. Do not call `start_self_profiling` in production.
12//!
13//! The method this tool uses to collect nested durations' runtimes prevents them from perfectly
14//! summing to 100% of the parent duration's runtime, even when the code is completely covered by
15//! adjoining durations. As a rule of thumb, the error should be limited to a few percent of the
16//! parent's runtime so if you see >10% unaccounted runtime you may need to add more durations.
17//!
18//! Runtimes are accumulated with atomics rather than with a single lock per duration to reduce
19//! error accumulation from lock contention, but this means that a given inspect snapshot may
20//! observe partially written results. This means that the tool is best suited for measuring
21//! workloads where durations occur at a high frequency (and as a result may be a poor fit for
22//! tracing) or where the timing of the inspect snapshot can be run after the profiled code has
23//! exited all durations of interest.
24//!
25//! This module does not yet integrate well with `async`/`.await` workloads.
26//!
27//! # Setup
28//!
29//! There are three steps required to collect profiling data with this module:
30//!
31//! 1. register a lazy node at the top of your component's inspect hierarchies using
32//!    `ProfileDuration::lazy_node_callback()`
33//! 2. call `start_self_profiling()`
34//! 3. invoke `profile_duration!("...");` at the beginning of scopes of interest
35//!
36//! # Analysis
37//!
38//! To see a human-readable summary of the captured profiles, take a snapshot of your component's
39//! inspect and pass the results to `self_profiles_report::SelfProfilesReport`.
40
41use fuchsia_inspect::{Inspector, Node};
42use fuchsia_sync::Mutex;
43use std::cell::RefCell;
44use std::collections::BTreeMap;
45use std::future::Future;
46use std::panic::Location;
47use std::pin::Pin;
48use std::sync::atomic::{AtomicBool, AtomicI64, AtomicU64, Ordering};
49use std::sync::{Arc, LazyLock};
50use zx::{self as zx, Task as _};
51
52static PROFILING_ENABLED: AtomicBool = AtomicBool::new(false);
53
54static ROOT_PROFILE_DURATION: LazyLock<Arc<ProfileDurationTree>> =
55    LazyLock::new(|| Arc::new(ProfileDurationTree::new(Location::caller())));
56
57thread_local! {
58    static CURRENT_PROFILE_DURATION: RefCell<Arc<ProfileDurationTree>> =
59        RefCell::new(ROOT_PROFILE_DURATION.clone());
60}
61
62/// Profile the remainder of the invoking lexical scope under the provided name.
63///
64/// If you need to split a single lexical scope into multiple durations, consider using
65/// `ProfileDuration::enter` and `ProfileDuration::pivot`.
66#[macro_export]
67macro_rules! profile_duration {
68    ($name:expr) => {
69        let _guard = $crate::ProfileDuration::enter($name);
70    };
71}
72
73/// Start collecting profiling information for any durations entered after this call is made.
74pub fn start_self_profiling() {
75    PROFILING_ENABLED.store(true, Ordering::Relaxed);
76}
77
78/// Stop collecting profiling information for any durations entered after this call is made.
79pub fn stop_self_profiling() {
80    PROFILING_ENABLED.store(false, Ordering::Relaxed);
81}
82
83#[inline]
84fn profiling_enabled() -> bool {
85    PROFILING_ENABLED.load(Ordering::Relaxed)
86}
87
88/// A guard value to manually control entry/exit of profile durations.
89pub struct ProfileDuration {
90    inner: Option<InnerGuard>,
91}
92
93impl ProfileDuration {
94    /// Function to be passed to `fuchsia_inspect::Node::record_lazy_child` to record profiling data.
95    pub fn lazy_node_callback(
96    ) -> Pin<Box<dyn Future<Output = Result<Inspector, anyhow::Error>> + Send + 'static>> {
97        Box::pin(async move {
98            let inspector = Inspector::default();
99            ROOT_PROFILE_DURATION.report(inspector.root());
100
101            // Make sure the analysis tooling can find this node regardless of the user-defined name.
102            inspector.root().record_bool("__profile_durations_root", true);
103            Ok(inspector)
104        })
105    }
106
107    /// Manually enter a profile duration. The duration will be exited when the returned value is
108    /// dropped.
109    ///
110    /// If using a single variable binding to hold guards for multiple durations, call
111    /// `ProfileDuration::pivot` instead of re-assigning to the binding.
112    #[track_caller]
113    pub fn enter(name: &'static str) -> Self {
114        Self { inner: if profiling_enabled() { Some(InnerGuard::enter(name)) } else { None } }
115    }
116
117    /// End the current duration and enter a new one, re-using the same guard value. Allows
118    /// splitting a single lexical scope into multiple durations.
119    #[track_caller]
120    pub fn pivot(&mut self, name: &'static str) {
121        if profiling_enabled() {
122            drop(self.inner.take());
123            self.inner = Some(InnerGuard::enter(name));
124        }
125    }
126}
127
128struct InnerGuard {
129    start_runtime: zx::TaskRuntimeInfo,
130    start_monotonic_ns: zx::MonotonicInstant,
131    parent_duration: Arc<ProfileDurationTree>,
132}
133
134impl InnerGuard {
135    #[track_caller]
136    fn enter(name: &'static str) -> Self {
137        let start_monotonic_ns = zx::MonotonicInstant::get();
138        let start_runtime = current_thread_runtime();
139
140        // Get the location outside the below closure since it can't be track_caller on stable.
141        let location = Location::caller();
142        let parent_duration = CURRENT_PROFILE_DURATION.with(|current_duration| {
143            let mut current_duration = current_duration.borrow_mut();
144            let child_duration = current_duration.child(name, location);
145            std::mem::replace(&mut *current_duration, child_duration)
146        });
147
148        Self { start_runtime, start_monotonic_ns, parent_duration }
149    }
150}
151
152impl Drop for InnerGuard {
153    fn drop(&mut self) {
154        CURRENT_PROFILE_DURATION.with(|current_duration| {
155            let mut current_duration = current_duration.borrow_mut();
156            let completed_duration =
157                std::mem::replace(&mut *current_duration, self.parent_duration.clone());
158
159            let runtime_delta = current_thread_runtime() - self.start_runtime;
160            let wall_time_delta = zx::MonotonicInstant::get() - self.start_monotonic_ns;
161
162            completed_duration.count.fetch_add(1, Ordering::Relaxed);
163            completed_duration.wall_time.fetch_add(wall_time_delta.into_nanos(), Ordering::Relaxed);
164            completed_duration.cpu_time.fetch_add(runtime_delta.cpu_time, Ordering::Relaxed);
165            completed_duration.queue_time.fetch_add(runtime_delta.queue_time, Ordering::Relaxed);
166            completed_duration
167                .page_fault_time
168                .fetch_add(runtime_delta.page_fault_time, Ordering::Relaxed);
169            completed_duration
170                .lock_contention_time
171                .fetch_add(runtime_delta.lock_contention_time, Ordering::Relaxed);
172        });
173    }
174}
175
176#[derive(Debug)]
177struct ProfileDurationTree {
178    location: &'static Location<'static>,
179    count: AtomicU64,
180    wall_time: AtomicI64,
181    cpu_time: AtomicI64,
182    queue_time: AtomicI64,
183    page_fault_time: AtomicI64,
184    lock_contention_time: AtomicI64,
185    children: Mutex<BTreeMap<&'static str, Arc<Self>>>,
186}
187
188impl ProfileDurationTree {
189    fn new(location: &'static Location<'static>) -> Self {
190        Self {
191            location,
192            count: Default::default(),
193            wall_time: Default::default(),
194            cpu_time: Default::default(),
195            queue_time: Default::default(),
196            page_fault_time: Default::default(),
197            lock_contention_time: Default::default(),
198            children: Default::default(),
199        }
200    }
201
202    fn child(&self, name: &'static str, location: &'static Location<'static>) -> Arc<Self> {
203        self.children.lock().entry(name).or_insert_with(|| Arc::new(Self::new(location))).clone()
204    }
205
206    fn report(&self, node: &Node) {
207        let children = self.children.lock();
208        node.record_string("location", self.location.to_string());
209        node.record_uint("count", self.count.load(Ordering::Relaxed));
210        node.record_int("wall_time", self.wall_time.load(Ordering::Relaxed));
211        node.record_int("cpu_time", self.cpu_time.load(Ordering::Relaxed));
212        node.record_int("queue_time", self.queue_time.load(Ordering::Relaxed));
213        node.record_int("page_fault_time", self.page_fault_time.load(Ordering::Relaxed));
214        node.record_int("lock_contention_time", self.lock_contention_time.load(Ordering::Relaxed));
215        for (name, child) in children.iter() {
216            node.record_child(*name, |n| child.report(n));
217        }
218    }
219}
220
221fn current_thread_runtime() -> zx::TaskRuntimeInfo {
222    fuchsia_runtime::thread_self()
223        .get_runtime_info()
224        .expect("should always be able to read own thread's runtime info")
225}