Skip to main content

wlan_telemetry/
lib.rs

1// Copyright 2024 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4use anyhow::{Context as _, Error, format_err};
5use fidl_fuchsia_power_battery as fidl_battery;
6use fidl_fuchsia_wlan_ieee80211 as fidl_ieee80211;
7use fidl_fuchsia_wlan_internal as fidl_internal;
8use fuchsia_async as fasync;
9use fuchsia_inspect::Node as InspectNode;
10use futures::channel::mpsc;
11use futures::{Future, StreamExt, select};
12use log::error;
13use std::boxed::Box;
14use windowed_stats::experimental::inspect::TimeMatrixClient;
15use wlan_common::bss::BssDescription;
16use wlan_legacy_metrics_registry as metrics;
17
18mod convert;
19mod processors;
20pub(crate) mod util;
21pub use crate::processors::connect_disconnect::DisconnectInfo;
22pub use crate::processors::pno_scan::PnoScanDisabledReason;
23pub use crate::processors::power::{IfacePowerLevel, UnclearPowerDemand};
24pub use crate::processors::scan::ScanResult;
25pub use crate::processors::toggle_events::ClientConnectionsToggleEvent;
26pub use util::sender::TelemetrySender;
27#[cfg(test)]
28mod testing;
29
30#[derive(Debug)]
31pub enum TelemetryEvent {
32    ConnectResult {
33        result: fidl_ieee80211::StatusCode,
34        bss: Box<BssDescription>,
35        is_credential_rejected: bool,
36    },
37    Disconnect {
38        info: DisconnectInfo,
39    },
40    // We should maintain docstrings if we can see any possibility of ambiguity for an enum
41    /// Client connections enabled or disabled
42    ClientConnectionsToggle {
43        event: ClientConnectionsToggleEvent,
44    },
45    ClientIfaceCreated {
46        iface_id: u16,
47    },
48    ClientIfaceDestroyed {
49        iface_id: u16,
50    },
51    IfaceCreationFailure,
52    IfaceDestructionFailure,
53    ScanStart,
54    ScanResult {
55        result: ScanResult,
56    },
57    IfacePowerLevelChanged {
58        iface_power_level: IfacePowerLevel,
59        iface_id: u16,
60    },
61    /// System suspension imminent
62    SuspendImminent,
63    /// Unclear power level requested by policy layer
64    UnclearPowerDemand(UnclearPowerDemand),
65    BatteryChargeStatus(fidl_battery::ChargeStatus),
66    RecoveryEvent {
67        result: Result<(), ()>,
68    },
69    SmeTimeout,
70    ChipPowerUpFailure,
71    ChipPowerDownFailure,
72    ResetTxPowerScenario,
73    SetTxPowerScenario {
74        scenario: fidl_internal::TxPowerScenario,
75    },
76    PnoScanFailure,
77    PnoScanEnabled,
78    PnoScanResultsReceived,
79    PnoScanDisabled {
80        reason: PnoScanDisabledReason,
81    },
82}
83
84/// If metrics cannot be reported for extended periods of time, logging new metrics will fail and
85/// the error messages tend to clutter up the logs.  This container limits the rate at which such
86/// potentially noisy logs are reported.  Duplicate error messages are aggregated periodically
87/// reported.
88pub struct ThrottledErrorLogger {
89    time_of_last_log: fasync::MonotonicInstant,
90    pub suppressed_errors: std::collections::HashMap<String, usize>,
91    minutes_between_reports: i64,
92}
93
94impl ThrottledErrorLogger {
95    pub fn new(minutes_between_reports: i64) -> Self {
96        Self {
97            time_of_last_log: fasync::MonotonicInstant::from_nanos(0),
98            suppressed_errors: std::collections::HashMap::new(),
99            minutes_between_reports,
100        }
101    }
102
103    pub fn throttle_log(&mut self, message: String, level: log::Level) {
104        let curr_time = fasync::MonotonicInstant::now();
105        let time_since_last_log = curr_time - self.time_of_last_log;
106
107        if time_since_last_log.into_minutes() > self.minutes_between_reports {
108            log::log!(level, "{}", message);
109            if !self.suppressed_errors.is_empty() {
110                for (log, count) in self.suppressed_errors.iter() {
111                    log::warn!("Suppressed {} instances: {}", count, log);
112                }
113                self.suppressed_errors.clear();
114            }
115            self.time_of_last_log = curr_time;
116        } else {
117            let count = self.suppressed_errors.entry(message).or_default();
118            *count += 1;
119        }
120    }
121
122    pub fn throttle_error(&mut self, result: Result<(), Error>) {
123        if let Err(e) = result {
124            self.throttle_log(e.to_string(), log::Level::Error);
125        }
126    }
127}
128
129/// Attempts to connect to the Cobalt service.
130pub async fn setup_cobalt_proxy()
131-> Result<fidl_fuchsia_metrics::MetricEventLoggerProxy, anyhow::Error> {
132    let cobalt_svc = fuchsia_component::client::connect_to_protocol::<
133        fidl_fuchsia_metrics::MetricEventLoggerFactoryMarker,
134    >()
135    .context("failed to connect to metrics service")?;
136
137    let (cobalt_proxy, cobalt_server) =
138        fidl::endpoints::create_proxy::<fidl_fuchsia_metrics::MetricEventLoggerMarker>();
139
140    let project_spec = fidl_fuchsia_metrics::ProjectSpec {
141        customer_id: Some(metrics::CUSTOMER_ID),
142        project_id: Some(metrics::PROJECT_ID),
143        ..Default::default()
144    };
145
146    match cobalt_svc.create_metric_event_logger(&project_spec, cobalt_server).await {
147        Ok(_) => Ok(cobalt_proxy),
148        Err(err) => Err(format_err!("failed to create metrics event logger: {:?}", err)),
149    }
150}
151
152/// Attempts to create a disconnected FIDL channel with types matching the Cobalt service. This
153/// allows for a fallback with a uniform code path in case of a failure to connect to Cobalt.
154pub fn setup_disconnected_cobalt_proxy()
155-> Result<fidl_fuchsia_metrics::MetricEventLoggerProxy, anyhow::Error> {
156    // Create a disconnected proxy
157    Ok(fidl::endpoints::create_proxy::<fidl_fuchsia_metrics::MetricEventLoggerMarker>().0)
158}
159
160/// How often to refresh time series stats. Also how often to request packet counters.
161const TELEMETRY_QUERY_INTERVAL: zx::MonotonicDuration = zx::MonotonicDuration::from_seconds(10);
162
163pub fn serve_telemetry(
164    cobalt_proxy: fidl_fuchsia_metrics::MetricEventLoggerProxy,
165    monitor_svc_proxy: fidl_fuchsia_wlan_device_service::DeviceMonitorProxy,
166    inspect_node: InspectNode,
167    inspect_path: &str,
168) -> (TelemetrySender, impl Future<Output = Result<(), Error>> + use<>) {
169    let (sender, mut receiver) =
170        mpsc::channel::<TelemetryEvent>(util::sender::TELEMETRY_EVENT_BUFFER_SIZE);
171    let sender = TelemetrySender::new(sender);
172
173    // Inspect nodes to hold time series and metadata for other nodes
174    const METADATA_NODE_NAME: &str = "metadata";
175    let inspect_metadata_node = inspect_node.create_child(METADATA_NODE_NAME);
176    let inspect_metadata_path = format!("{inspect_path}/{METADATA_NODE_NAME}");
177    let inspect_time_series_node = inspect_node.create_child("time_series");
178    let driver_specific_time_series_node = inspect_time_series_node.create_child("driver_specific");
179    let driver_counters_time_series_node =
180        driver_specific_time_series_node.create_child("counters");
181    let driver_gauges_time_series_node = driver_specific_time_series_node.create_child("gauges");
182
183    let time_matrix_client = TimeMatrixClient::new(inspect_time_series_node.clone_weak());
184    let driver_counters_time_series_client =
185        TimeMatrixClient::new(driver_counters_time_series_node.clone_weak());
186    let driver_gauges_time_series_client =
187        TimeMatrixClient::new(driver_gauges_time_series_node.clone_weak());
188
189    // Create and initialize modules
190    let connect_disconnect = processors::connect_disconnect::ConnectDisconnectLogger::new(
191        cobalt_proxy.clone(),
192        &inspect_node,
193        &inspect_metadata_node,
194        &inspect_metadata_path,
195        &time_matrix_client,
196    );
197    let iface_logger = processors::iface::IfaceLogger::new(cobalt_proxy.clone());
198    let power_logger = processors::power::PowerLogger::new(cobalt_proxy.clone(), &inspect_node);
199    let recovery_logger = processors::recovery::RecoveryLogger::new(cobalt_proxy.clone());
200    let mut scan_logger =
201        processors::scan::ScanLogger::new(cobalt_proxy.clone(), &time_matrix_client);
202    let mut pno_scan_logger = processors::pno_scan::PnoScanLogger::new(cobalt_proxy.clone());
203    let sme_timeout_logger = processors::sme_timeout::SmeTimeoutLogger::new(cobalt_proxy.clone());
204    let mut toggle_logger =
205        processors::toggle_events::ToggleLogger::new(cobalt_proxy.clone(), &inspect_node);
206    let tx_power_scenario_logger =
207        processors::tx_power_scenario::TxPowerScenarioLogger::new(cobalt_proxy.clone());
208
209    let client_iface_counters_logger =
210        processors::client_iface_counters::ClientIfaceCountersLogger::new(
211            cobalt_proxy,
212            monitor_svc_proxy,
213            &inspect_metadata_node,
214            &inspect_metadata_path,
215            &time_matrix_client,
216            driver_counters_time_series_client,
217            driver_gauges_time_series_client,
218        );
219
220    let fut = async move {
221        // Prevent the inspect nodes from being dropped while the loop is running.
222        let _inspect_node = inspect_node;
223        let _inspect_metadata_node = inspect_metadata_node;
224        let _inspect_time_series_node = inspect_time_series_node;
225        let _driver_specific_time_series_node = driver_specific_time_series_node;
226        let _driver_counters_time_series_node = driver_counters_time_series_node;
227        let _driver_gauges_time_series_node = driver_gauges_time_series_node;
228
229        let mut telemetry_interval = fasync::Interval::new(TELEMETRY_QUERY_INTERVAL);
230        loop {
231            select! {
232                event = receiver.next() => {
233                    let Some(event) = event else {
234                        error!("Telemetry event stream unexpectedly terminated.");
235                        return Err(format_err!("Telemetry event stream unexpectedly terminated."));
236                    };
237                    use TelemetryEvent::*;
238                    match event {
239                        ConnectResult { result, bss, is_credential_rejected } => {
240                            connect_disconnect.handle_connect_attempt(result, &bss, is_credential_rejected).await;
241                        }
242                        Disconnect { info } => {
243                            connect_disconnect.log_disconnect(&info).await;
244                            power_logger.handle_iface_disconnect(info.iface_id).await;
245                        }
246                        ClientConnectionsToggle { event } => {
247                            connect_disconnect.handle_client_connections_toggle(&event).await;
248                            toggle_logger.handle_toggle_event(event).await;
249                        }
250                        ClientIfaceCreated { iface_id } => {
251                            client_iface_counters_logger.handle_iface_created(iface_id).await;
252                        }
253                        ClientIfaceDestroyed { iface_id } => {
254                            connect_disconnect.handle_iface_destroyed().await;
255                            client_iface_counters_logger.handle_iface_destroyed(iface_id).await;
256                            power_logger.handle_iface_destroyed(iface_id).await;
257                        }
258                        IfaceCreationFailure => {
259                            iface_logger.handle_iface_creation_failure().await;
260                        }
261                        IfaceDestructionFailure => {
262                            iface_logger.handle_iface_destruction_failure().await;
263                        }
264                        ScanStart => {
265                            scan_logger.handle_scan_start().await;
266                        }
267                        ScanResult { result } => {
268                            scan_logger.handle_scan_result(result).await;
269                        }
270                        IfacePowerLevelChanged { iface_power_level, iface_id } => {
271                            power_logger.log_iface_power_event(iface_power_level, iface_id).await;
272                        }
273                        // TODO(b/340921554): either watch for suspension directly in the library,
274                        // or plumb this from callers once suspend mechanisms are integrated
275                        SuspendImminent => {
276                            power_logger.handle_suspend_imminent().await;
277                            connect_disconnect.handle_suspend_imminent().await;
278                        }
279                        UnclearPowerDemand(demand) => {
280                            power_logger.handle_unclear_power_demand(demand).await;
281                        }
282                        ChipPowerUpFailure => {
283                            power_logger.handle_chip_power_up_failure().await;
284                            connect_disconnect.handle_client_connections_failed_to_start().await;
285                        }
286                        ChipPowerDownFailure => {
287                            power_logger.chip_power_down_failure().await;
288                            connect_disconnect.handle_client_connections_failed_to_stop().await;
289                        }
290                        BatteryChargeStatus(charge_status) => {
291                            scan_logger.handle_battery_charge_status(charge_status).await;
292                            toggle_logger.handle_battery_charge_status(charge_status).await;
293                        }
294                        RecoveryEvent { result } => {
295                            recovery_logger.handle_recovery_event(result).await;
296                        }
297                        SmeTimeout => {
298                            sme_timeout_logger.handle_sme_timeout_event().await;
299                        }
300                        ResetTxPowerScenario => {
301                            tx_power_scenario_logger.handle_sar_reset().await;
302                        }
303                        SetTxPowerScenario {scenario} => {
304                            tx_power_scenario_logger.handle_set_sar(scenario).await;
305                        }
306                        PnoScanFailure => {
307                            connect_disconnect.handle_pno_scan_failure().await;
308                        }
309                        PnoScanEnabled => {
310                            let is_connected = connect_disconnect.is_connected();
311                            pno_scan_logger.handle_pno_scan_enabled(is_connected).await;
312                        }
313                        PnoScanResultsReceived => {
314                            pno_scan_logger.handle_pno_scan_results_received().await;
315                        }
316                        PnoScanDisabled { reason } => {
317                            pno_scan_logger.handle_pno_scan_disabled(reason).await;
318                        }
319                    }
320                }
321                _ = telemetry_interval.next() => {
322                    connect_disconnect.handle_periodic_telemetry().await;
323                    client_iface_counters_logger.handle_periodic_telemetry().await;
324                    pno_scan_logger.handle_periodic_telemetry().await;
325                }
326            }
327        }
328    };
329    (sender, fut)
330}