Skip to main content

wlan_telemetry/
lib.rs

1// Copyright 2024 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4use anyhow::{Context as _, Error, format_err};
5use fidl_fuchsia_power_battery as fidl_battery;
6use fidl_fuchsia_wlan_ieee80211 as fidl_ieee80211;
7use fidl_fuchsia_wlan_internal as fidl_internal;
8use fuchsia_async as fasync;
9use fuchsia_inspect::Node as InspectNode;
10use futures::channel::mpsc;
11use futures::{Future, StreamExt, select};
12use log::error;
13use std::boxed::Box;
14use windowed_stats::experimental::inspect::TimeMatrixClient;
15use wlan_common::bss::BssDescription;
16use wlan_legacy_metrics_registry as metrics;
17
18mod processors;
19pub(crate) mod util;
20pub use crate::processors::connect_disconnect::DisconnectInfo;
21pub use crate::processors::power::{IfacePowerLevel, UnclearPowerDemand};
22pub use crate::processors::scan::ScanResult;
23pub use crate::processors::toggle_events::ClientConnectionsToggleEvent;
24pub use util::sender::TelemetrySender;
25#[cfg(test)]
26mod testing;
27
28#[derive(Debug)]
29pub enum TelemetryEvent {
30    ConnectResult {
31        result: fidl_ieee80211::StatusCode,
32        bss: Box<BssDescription>,
33        is_credential_rejected: bool,
34    },
35    Disconnect {
36        info: DisconnectInfo,
37    },
38    // We should maintain docstrings if we can see any possibility of ambiguity for an enum
39    /// Client connections enabled or disabled
40    ClientConnectionsToggle {
41        event: ClientConnectionsToggleEvent,
42    },
43    ClientIfaceCreated {
44        iface_id: u16,
45    },
46    ClientIfaceDestroyed {
47        iface_id: u16,
48    },
49    IfaceCreationFailure,
50    IfaceDestructionFailure,
51    ScanStart,
52    ScanResult {
53        result: ScanResult,
54    },
55    IfacePowerLevelChanged {
56        iface_power_level: IfacePowerLevel,
57        iface_id: u16,
58    },
59    /// System suspension imminent
60    SuspendImminent,
61    /// Unclear power level requested by policy layer
62    UnclearPowerDemand(UnclearPowerDemand),
63    BatteryChargeStatus(fidl_battery::ChargeStatus),
64    RecoveryEvent {
65        result: Result<(), ()>,
66    },
67    SmeTimeout,
68    ChipPowerUpFailure,
69    ChipPowerDownFailure,
70    ResetTxPowerScenario,
71    SetTxPowerScenario {
72        scenario: fidl_internal::TxPowerScenario,
73    },
74    PnoScanFailure,
75}
76
77/// If metrics cannot be reported for extended periods of time, logging new metrics will fail and
78/// the error messages tend to clutter up the logs.  This container limits the rate at which such
79/// potentially noisy logs are reported.  Duplicate error messages are aggregated periodically
80/// reported.
81pub struct ThrottledErrorLogger {
82    time_of_last_log: fasync::MonotonicInstant,
83    pub suppressed_errors: std::collections::HashMap<String, usize>,
84    minutes_between_reports: i64,
85}
86
87impl ThrottledErrorLogger {
88    pub fn new(minutes_between_reports: i64) -> Self {
89        Self {
90            time_of_last_log: fasync::MonotonicInstant::from_nanos(0),
91            suppressed_errors: std::collections::HashMap::new(),
92            minutes_between_reports,
93        }
94    }
95
96    pub fn throttle_log(&mut self, error: String) {
97        let curr_time = fasync::MonotonicInstant::now();
98        let time_since_last_log = curr_time - self.time_of_last_log;
99
100        if time_since_last_log.into_minutes() > self.minutes_between_reports {
101            error!("{}", error);
102            if !self.suppressed_errors.is_empty() {
103                for (log, count) in self.suppressed_errors.iter() {
104                    log::warn!("Suppressed {} instances: {}", count, log);
105                }
106                self.suppressed_errors.clear();
107            }
108            self.time_of_last_log = curr_time;
109        } else {
110            let count = self.suppressed_errors.entry(error).or_default();
111            *count += 1;
112        }
113    }
114
115    pub fn throttle_error(&mut self, result: Result<(), Error>) {
116        if let Err(e) = result {
117            self.throttle_log(e.to_string());
118        }
119    }
120}
121
122/// Attempts to connect to the Cobalt service.
123pub async fn setup_cobalt_proxy()
124-> Result<fidl_fuchsia_metrics::MetricEventLoggerProxy, anyhow::Error> {
125    let cobalt_svc = fuchsia_component::client::connect_to_protocol::<
126        fidl_fuchsia_metrics::MetricEventLoggerFactoryMarker,
127    >()
128    .context("failed to connect to metrics service")?;
129
130    let (cobalt_proxy, cobalt_server) =
131        fidl::endpoints::create_proxy::<fidl_fuchsia_metrics::MetricEventLoggerMarker>();
132
133    let project_spec = fidl_fuchsia_metrics::ProjectSpec {
134        customer_id: Some(metrics::CUSTOMER_ID),
135        project_id: Some(metrics::PROJECT_ID),
136        ..Default::default()
137    };
138
139    match cobalt_svc.create_metric_event_logger(&project_spec, cobalt_server).await {
140        Ok(_) => Ok(cobalt_proxy),
141        Err(err) => Err(format_err!("failed to create metrics event logger: {:?}", err)),
142    }
143}
144
145/// Attempts to create a disconnected FIDL channel with types matching the Cobalt service. This
146/// allows for a fallback with a uniform code path in case of a failure to connect to Cobalt.
147pub fn setup_disconnected_cobalt_proxy()
148-> Result<fidl_fuchsia_metrics::MetricEventLoggerProxy, anyhow::Error> {
149    // Create a disconnected proxy
150    Ok(fidl::endpoints::create_proxy::<fidl_fuchsia_metrics::MetricEventLoggerMarker>().0)
151}
152
153/// How often to refresh time series stats. Also how often to request packet counters.
154const TELEMETRY_QUERY_INTERVAL: zx::MonotonicDuration = zx::MonotonicDuration::from_seconds(10);
155
156pub fn serve_telemetry(
157    cobalt_proxy: fidl_fuchsia_metrics::MetricEventLoggerProxy,
158    monitor_svc_proxy: fidl_fuchsia_wlan_device_service::DeviceMonitorProxy,
159    inspect_node: InspectNode,
160    inspect_path: &str,
161) -> (TelemetrySender, impl Future<Output = Result<(), Error>> + use<>) {
162    let (sender, mut receiver) =
163        mpsc::channel::<TelemetryEvent>(util::sender::TELEMETRY_EVENT_BUFFER_SIZE);
164    let sender = TelemetrySender::new(sender);
165
166    // Inspect nodes to hold time series and metadata for other nodes
167    const METADATA_NODE_NAME: &str = "metadata";
168    let inspect_metadata_node = inspect_node.create_child(METADATA_NODE_NAME);
169    let inspect_metadata_path = format!("{inspect_path}/{METADATA_NODE_NAME}");
170    let inspect_time_series_node = inspect_node.create_child("time_series");
171    let driver_specific_time_series_node = inspect_time_series_node.create_child("driver_specific");
172    let driver_counters_time_series_node =
173        driver_specific_time_series_node.create_child("counters");
174    let driver_gauges_time_series_node = driver_specific_time_series_node.create_child("gauges");
175
176    let time_matrix_client = TimeMatrixClient::new(inspect_time_series_node.clone_weak());
177    let driver_counters_time_series_client =
178        TimeMatrixClient::new(driver_counters_time_series_node.clone_weak());
179    let driver_gauges_time_series_client =
180        TimeMatrixClient::new(driver_gauges_time_series_node.clone_weak());
181
182    // Create and initialize modules
183    let connect_disconnect = processors::connect_disconnect::ConnectDisconnectLogger::new(
184        cobalt_proxy.clone(),
185        &inspect_node,
186        &inspect_metadata_node,
187        &inspect_metadata_path,
188        &time_matrix_client,
189    );
190    let iface_logger = processors::iface::IfaceLogger::new(cobalt_proxy.clone());
191    let power_logger = processors::power::PowerLogger::new(cobalt_proxy.clone(), &inspect_node);
192    let recovery_logger = processors::recovery::RecoveryLogger::new(cobalt_proxy.clone());
193    let mut scan_logger =
194        processors::scan::ScanLogger::new(cobalt_proxy.clone(), &time_matrix_client);
195    let sme_timeout_logger = processors::sme_timeout::SmeTimeoutLogger::new(cobalt_proxy.clone());
196    let mut toggle_logger =
197        processors::toggle_events::ToggleLogger::new(cobalt_proxy.clone(), &inspect_node);
198    let tx_power_scenario_logger =
199        processors::tx_power_scenario::TxPowerScenarioLogger::new(cobalt_proxy.clone());
200
201    let client_iface_counters_logger =
202        processors::client_iface_counters::ClientIfaceCountersLogger::new(
203            cobalt_proxy,
204            monitor_svc_proxy,
205            &inspect_metadata_node,
206            &inspect_metadata_path,
207            &time_matrix_client,
208            driver_counters_time_series_client,
209            driver_gauges_time_series_client,
210        );
211
212    let fut = async move {
213        // Prevent the inspect nodes from being dropped while the loop is running.
214        let _inspect_node = inspect_node;
215        let _inspect_metadata_node = inspect_metadata_node;
216        let _inspect_time_series_node = inspect_time_series_node;
217        let _driver_specific_time_series_node = driver_specific_time_series_node;
218        let _driver_counters_time_series_node = driver_counters_time_series_node;
219        let _driver_gauges_time_series_node = driver_gauges_time_series_node;
220
221        let mut telemetry_interval = fasync::Interval::new(TELEMETRY_QUERY_INTERVAL);
222        loop {
223            select! {
224                event = receiver.next() => {
225                    let Some(event) = event else {
226                        error!("Telemetry event stream unexpectedly terminated.");
227                        return Err(format_err!("Telemetry event stream unexpectedly terminated."));
228                    };
229                    use TelemetryEvent::*;
230                    match event {
231                        ConnectResult { result, bss, is_credential_rejected } => {
232                            connect_disconnect.handle_connect_attempt(result, &bss, is_credential_rejected).await;
233                        }
234                        Disconnect { info } => {
235                            connect_disconnect.log_disconnect(&info).await;
236                            power_logger.handle_iface_disconnect(info.iface_id).await;
237                        }
238                        ClientConnectionsToggle { event } => {
239                            connect_disconnect.handle_client_connections_toggle(&event).await;
240                            toggle_logger.handle_toggle_event(event).await;
241                        }
242                        ClientIfaceCreated { iface_id } => {
243                            client_iface_counters_logger.handle_iface_created(iface_id).await;
244                        }
245                        ClientIfaceDestroyed { iface_id } => {
246                            connect_disconnect.handle_iface_destroyed().await;
247                            client_iface_counters_logger.handle_iface_destroyed(iface_id).await;
248                            power_logger.handle_iface_destroyed(iface_id).await;
249                        }
250                        IfaceCreationFailure => {
251                            iface_logger.handle_iface_creation_failure().await;
252                        }
253                        IfaceDestructionFailure => {
254                            iface_logger.handle_iface_destruction_failure().await;
255                        }
256                        ScanStart => {
257                            scan_logger.handle_scan_start().await;
258                        }
259                        ScanResult { result } => {
260                            scan_logger.handle_scan_result(result).await;
261                        }
262                        IfacePowerLevelChanged { iface_power_level, iface_id } => {
263                            power_logger.log_iface_power_event(iface_power_level, iface_id).await;
264                        }
265                        // TODO(b/340921554): either watch for suspension directly in the library,
266                        // or plumb this from callers once suspend mechanisms are integrated
267                        SuspendImminent => {
268                            power_logger.handle_suspend_imminent().await;
269                            connect_disconnect.handle_suspend_imminent().await;
270                        }
271                        UnclearPowerDemand(demand) => {
272                            power_logger.handle_unclear_power_demand(demand).await;
273                        }
274                        ChipPowerUpFailure => {
275                            power_logger.handle_chip_power_up_failure().await;
276                            connect_disconnect.handle_client_connections_failed_to_start().await;
277                        }
278                        ChipPowerDownFailure => {
279                            power_logger.chip_power_down_failure().await;
280                            connect_disconnect.handle_client_connections_failed_to_stop().await;
281                        }
282                        BatteryChargeStatus(charge_status) => {
283                            scan_logger.handle_battery_charge_status(charge_status).await;
284                            toggle_logger.handle_battery_charge_status(charge_status).await;
285                        }
286                        RecoveryEvent { result } => {
287                            recovery_logger.handle_recovery_event(result).await;
288                        }
289                        SmeTimeout => {
290                            sme_timeout_logger.handle_sme_timeout_event().await;
291                        }
292                        ResetTxPowerScenario => {
293                            tx_power_scenario_logger.handle_sar_reset().await;
294                        }
295                        SetTxPowerScenario {scenario} => {
296                            tx_power_scenario_logger.handle_set_sar(scenario).await;
297                        }
298                        PnoScanFailure => {
299                            connect_disconnect.handle_pno_scan_failure().await;
300                        }
301                    }
302                }
303                _ = telemetry_interval.next() => {
304                    connect_disconnect.handle_periodic_telemetry().await;
305                    client_iface_counters_logger.handle_periodic_telemetry().await;
306                }
307            }
308        }
309    };
310    (sender, fut)
311}