Skip to main content

wlan_telemetry/
lib.rs

1// Copyright 2024 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4use anyhow::{Context as _, Error, format_err};
5use fidl_fuchsia_power_battery as fidl_battery;
6use fidl_fuchsia_wlan_ieee80211 as fidl_ieee80211;
7use fidl_fuchsia_wlan_internal as fidl_internal;
8use fuchsia_async as fasync;
9use fuchsia_inspect::Node as InspectNode;
10use futures::channel::mpsc;
11use futures::{Future, StreamExt, select};
12use log::error;
13use std::boxed::Box;
14use windowed_stats::experimental::inspect::TimeMatrixClient;
15use wlan_common::bss::BssDescription;
16use wlan_legacy_metrics_registry as metrics;
17
18mod processors;
19pub(crate) mod util;
20pub use crate::processors::connect_disconnect::DisconnectInfo;
21pub use crate::processors::power::{IfacePowerLevel, UnclearPowerDemand};
22pub use crate::processors::scan::ScanResult;
23pub use crate::processors::toggle_events::ClientConnectionsToggleEvent;
24pub use util::sender::TelemetrySender;
25#[cfg(test)]
26mod testing;
27
28#[derive(Debug)]
29pub enum TelemetryEvent {
30    ConnectResult {
31        result: fidl_ieee80211::StatusCode,
32        bss: Box<BssDescription>,
33        is_credential_rejected: bool,
34    },
35    Disconnect {
36        info: DisconnectInfo,
37    },
38    // We should maintain docstrings if we can see any possibility of ambiguity for an enum
39    /// Client connections enabled or disabled
40    ClientConnectionsToggle {
41        event: ClientConnectionsToggleEvent,
42    },
43    ClientIfaceCreated {
44        iface_id: u16,
45    },
46    ClientIfaceDestroyed {
47        iface_id: u16,
48    },
49    IfaceCreationFailure,
50    IfaceDestructionFailure,
51    ScanStart,
52    ScanResult {
53        result: ScanResult,
54    },
55    IfacePowerLevelChanged {
56        iface_power_level: IfacePowerLevel,
57        iface_id: u16,
58    },
59    /// System suspension imminent
60    SuspendImminent,
61    /// Unclear power level requested by policy layer
62    UnclearPowerDemand(UnclearPowerDemand),
63    BatteryChargeStatus(fidl_battery::ChargeStatus),
64    RecoveryEvent {
65        result: Result<(), ()>,
66    },
67    SmeTimeout,
68    ChipPowerUpFailure,
69    ChipPowerDownFailure,
70    ResetTxPowerScenario,
71    SetTxPowerScenario {
72        scenario: fidl_internal::TxPowerScenario,
73    },
74}
75
76/// If metrics cannot be reported for extended periods of time, logging new metrics will fail and
77/// the error messages tend to clutter up the logs.  This container limits the rate at which such
78/// potentially noisy logs are reported.  Duplicate error messages are aggregated periodically
79/// reported.
80pub struct ThrottledErrorLogger {
81    time_of_last_log: fasync::MonotonicInstant,
82    pub suppressed_errors: std::collections::HashMap<String, usize>,
83    minutes_between_reports: i64,
84}
85
86impl ThrottledErrorLogger {
87    pub fn new(minutes_between_reports: i64) -> Self {
88        Self {
89            time_of_last_log: fasync::MonotonicInstant::from_nanos(0),
90            suppressed_errors: std::collections::HashMap::new(),
91            minutes_between_reports,
92        }
93    }
94
95    pub fn throttle_log(&mut self, error: String) {
96        let curr_time = fasync::MonotonicInstant::now();
97        let time_since_last_log = curr_time - self.time_of_last_log;
98
99        if time_since_last_log.into_minutes() > self.minutes_between_reports {
100            error!("{}", error);
101            if !self.suppressed_errors.is_empty() {
102                for (log, count) in self.suppressed_errors.iter() {
103                    log::warn!("Suppressed {} instances: {}", count, log);
104                }
105                self.suppressed_errors.clear();
106            }
107            self.time_of_last_log = curr_time;
108        } else {
109            let count = self.suppressed_errors.entry(error).or_default();
110            *count += 1;
111        }
112    }
113
114    pub fn throttle_error(&mut self, result: Result<(), Error>) {
115        if let Err(e) = result {
116            self.throttle_log(e.to_string());
117        }
118    }
119}
120
121/// Attempts to connect to the Cobalt service.
122pub async fn setup_cobalt_proxy()
123-> Result<fidl_fuchsia_metrics::MetricEventLoggerProxy, anyhow::Error> {
124    let cobalt_svc = fuchsia_component::client::connect_to_protocol::<
125        fidl_fuchsia_metrics::MetricEventLoggerFactoryMarker,
126    >()
127    .context("failed to connect to metrics service")?;
128
129    let (cobalt_proxy, cobalt_server) =
130        fidl::endpoints::create_proxy::<fidl_fuchsia_metrics::MetricEventLoggerMarker>();
131
132    let project_spec = fidl_fuchsia_metrics::ProjectSpec {
133        customer_id: Some(metrics::CUSTOMER_ID),
134        project_id: Some(metrics::PROJECT_ID),
135        ..Default::default()
136    };
137
138    match cobalt_svc.create_metric_event_logger(&project_spec, cobalt_server).await {
139        Ok(_) => Ok(cobalt_proxy),
140        Err(err) => Err(format_err!("failed to create metrics event logger: {:?}", err)),
141    }
142}
143
144/// Attempts to create a disconnected FIDL channel with types matching the Cobalt service. This
145/// allows for a fallback with a uniform code path in case of a failure to connect to Cobalt.
146pub fn setup_disconnected_cobalt_proxy()
147-> Result<fidl_fuchsia_metrics::MetricEventLoggerProxy, anyhow::Error> {
148    // Create a disconnected proxy
149    Ok(fidl::endpoints::create_proxy::<fidl_fuchsia_metrics::MetricEventLoggerMarker>().0)
150}
151
152/// How often to refresh time series stats. Also how often to request packet counters.
153const TELEMETRY_QUERY_INTERVAL: zx::MonotonicDuration = zx::MonotonicDuration::from_seconds(10);
154
155pub fn serve_telemetry(
156    cobalt_proxy: fidl_fuchsia_metrics::MetricEventLoggerProxy,
157    monitor_svc_proxy: fidl_fuchsia_wlan_device_service::DeviceMonitorProxy,
158    inspect_node: InspectNode,
159    inspect_path: &str,
160) -> (TelemetrySender, impl Future<Output = Result<(), Error>> + use<>) {
161    let (sender, mut receiver) =
162        mpsc::channel::<TelemetryEvent>(util::sender::TELEMETRY_EVENT_BUFFER_SIZE);
163    let sender = TelemetrySender::new(sender);
164
165    // Inspect nodes to hold time series and metadata for other nodes
166    const METADATA_NODE_NAME: &str = "metadata";
167    let inspect_metadata_node = inspect_node.create_child(METADATA_NODE_NAME);
168    let inspect_metadata_path = format!("{inspect_path}/{METADATA_NODE_NAME}");
169    let inspect_time_series_node = inspect_node.create_child("time_series");
170    let driver_specific_time_series_node = inspect_time_series_node.create_child("driver_specific");
171    let driver_counters_time_series_node =
172        driver_specific_time_series_node.create_child("counters");
173    let driver_gauges_time_series_node = driver_specific_time_series_node.create_child("gauges");
174
175    let time_matrix_client = TimeMatrixClient::new(inspect_time_series_node.clone_weak());
176    let driver_counters_time_series_client =
177        TimeMatrixClient::new(driver_counters_time_series_node.clone_weak());
178    let driver_gauges_time_series_client =
179        TimeMatrixClient::new(driver_gauges_time_series_node.clone_weak());
180
181    // Create and initialize modules
182    let connect_disconnect = processors::connect_disconnect::ConnectDisconnectLogger::new(
183        cobalt_proxy.clone(),
184        &inspect_node,
185        &inspect_metadata_node,
186        &inspect_metadata_path,
187        &time_matrix_client,
188    );
189    let iface_logger = processors::iface::IfaceLogger::new(cobalt_proxy.clone());
190    let power_logger = processors::power::PowerLogger::new(cobalt_proxy.clone(), &inspect_node);
191    let recovery_logger = processors::recovery::RecoveryLogger::new(cobalt_proxy.clone());
192    let mut scan_logger =
193        processors::scan::ScanLogger::new(cobalt_proxy.clone(), &time_matrix_client);
194    let sme_timeout_logger = processors::sme_timeout::SmeTimeoutLogger::new(cobalt_proxy.clone());
195    let mut toggle_logger =
196        processors::toggle_events::ToggleLogger::new(cobalt_proxy.clone(), &inspect_node);
197    let tx_power_scenario_logger =
198        processors::tx_power_scenario::TxPowerScenarioLogger::new(cobalt_proxy.clone());
199
200    let client_iface_counters_logger =
201        processors::client_iface_counters::ClientIfaceCountersLogger::new(
202            cobalt_proxy,
203            monitor_svc_proxy,
204            &inspect_metadata_node,
205            &inspect_metadata_path,
206            &time_matrix_client,
207            driver_counters_time_series_client,
208            driver_gauges_time_series_client,
209        );
210
211    let fut = async move {
212        // Prevent the inspect nodes from being dropped while the loop is running.
213        let _inspect_node = inspect_node;
214        let _inspect_metadata_node = inspect_metadata_node;
215        let _inspect_time_series_node = inspect_time_series_node;
216        let _driver_specific_time_series_node = driver_specific_time_series_node;
217        let _driver_counters_time_series_node = driver_counters_time_series_node;
218        let _driver_gauges_time_series_node = driver_gauges_time_series_node;
219
220        let mut telemetry_interval = fasync::Interval::new(TELEMETRY_QUERY_INTERVAL);
221        loop {
222            select! {
223                event = receiver.next() => {
224                    let Some(event) = event else {
225                        error!("Telemetry event stream unexpectedly terminated.");
226                        return Err(format_err!("Telemetry event stream unexpectedly terminated."));
227                    };
228                    use TelemetryEvent::*;
229                    match event {
230                        ConnectResult { result, bss, is_credential_rejected } => {
231                            connect_disconnect.handle_connect_attempt(result, &bss, is_credential_rejected).await;
232                        }
233                        Disconnect { info } => {
234                            connect_disconnect.log_disconnect(&info).await;
235                            power_logger.handle_iface_disconnect(info.iface_id).await;
236                        }
237                        ClientConnectionsToggle { event } => {
238                            connect_disconnect.handle_client_connections_toggle(&event).await;
239                            toggle_logger.handle_toggle_event(event).await;
240                        }
241                        ClientIfaceCreated { iface_id } => {
242                            client_iface_counters_logger.handle_iface_created(iface_id).await;
243                        }
244                        ClientIfaceDestroyed { iface_id } => {
245                            connect_disconnect.handle_iface_destroyed().await;
246                            client_iface_counters_logger.handle_iface_destroyed(iface_id).await;
247                            power_logger.handle_iface_destroyed(iface_id).await;
248                        }
249                        IfaceCreationFailure => {
250                            iface_logger.handle_iface_creation_failure().await;
251                        }
252                        IfaceDestructionFailure => {
253                            iface_logger.handle_iface_destruction_failure().await;
254                        }
255                        ScanStart => {
256                            scan_logger.handle_scan_start().await;
257                        }
258                        ScanResult { result } => {
259                            scan_logger.handle_scan_result(result).await;
260                        }
261                        IfacePowerLevelChanged { iface_power_level, iface_id } => {
262                            power_logger.log_iface_power_event(iface_power_level, iface_id).await;
263                        }
264                        // TODO(b/340921554): either watch for suspension directly in the library,
265                        // or plumb this from callers once suspend mechanisms are integrated
266                        SuspendImminent => {
267                            power_logger.handle_suspend_imminent().await;
268                            connect_disconnect.handle_suspend_imminent().await;
269                        }
270                        UnclearPowerDemand(demand) => {
271                            power_logger.handle_unclear_power_demand(demand).await;
272                        }
273                        ChipPowerUpFailure => {
274                            power_logger.handle_chip_power_up_failure().await;
275                            connect_disconnect.handle_client_connections_failed_to_start().await;
276                        }
277                        ChipPowerDownFailure => {
278                            power_logger.chip_power_down_failure().await;
279                            connect_disconnect.handle_client_connections_failed_to_stop().await;
280                        }
281                        BatteryChargeStatus(charge_status) => {
282                            scan_logger.handle_battery_charge_status(charge_status).await;
283                            toggle_logger.handle_battery_charge_status(charge_status).await;
284                        }
285                        RecoveryEvent { result } => {
286                            recovery_logger.handle_recovery_event(result).await;
287                        }
288                        SmeTimeout => {
289                            sme_timeout_logger.handle_sme_timeout_event().await;
290                        }
291                        ResetTxPowerScenario => {
292                            tx_power_scenario_logger.handle_sar_reset().await;
293                        }
294                        SetTxPowerScenario {scenario} => {
295                            tx_power_scenario_logger.handle_set_sar(scenario).await;
296                        }
297                    }
298                }
299                _ = telemetry_interval.next() => {
300                    connect_disconnect.handle_periodic_telemetry().await;
301                    client_iface_counters_logger.handle_periodic_telemetry().await;
302                }
303            }
304        }
305    };
306    (sender, fut)
307}