netstack3_filter/
conntrack.rs

1// Copyright 2024 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5mod tcp;
6
7use alloc::collections::HashMap;
8use alloc::fmt::Debug;
9use alloc::sync::{Arc, Weak};
10use alloc::vec::Vec;
11use assert_matches::assert_matches;
12use core::any::Any;
13use core::fmt::Display;
14use core::hash::Hash;
15use core::time::Duration;
16
17use derivative::Derivative;
18use net_types::ip::{GenericOverIp, Ip, IpVersionMarker};
19use packet_formats::ip::{IpExt, IpProto, Ipv4Proto, Ipv6Proto};
20
21use crate::context::{FilterBindingsContext, FilterBindingsTypes};
22use crate::logic::FilterTimerId;
23use crate::packets::{IpPacket, MaybeTransportPacket, TransportPacketData};
24use netstack3_base::sync::Mutex;
25use netstack3_base::{CoreTimerContext, Inspectable, Inspector, Instant, TimerContext};
26
27/// The time from the end of one GC cycle to the beginning of the next.
28const GC_INTERVAL: Duration = Duration::from_secs(10);
29
30/// The time since the last seen packet after which an established UDP
31/// connection will be considered expired and is eligible for garbage
32/// collection.
33///
34/// This was taken from RFC 4787 REQ-5.
35const CONNECTION_EXPIRY_TIME_UDP: Duration = Duration::from_secs(120);
36
37/// The time since the last seen packet after which a generic connection will be
38/// considered expired and is eligible for garbage collection.
39const CONNECTION_EXPIRY_OTHER: Duration = Duration::from_secs(30);
40
41/// The maximum number of entries in the conntrack table.
42///
43/// NOTE: This is subtly different from the number of connections in the table
44/// because self-connected sockets only have a single entry instead of the
45/// normal two.
46pub(crate) const MAXIMUM_ENTRIES: usize = 100_000;
47
48/// Implements a connection tracking subsystem.
49///
50/// The `E` parameter is for external data that is stored in the [`Connection`]
51/// struct and can be extracted with the [`Connection::external_data()`]
52/// function.
53pub struct Table<I: IpExt, E, BT: FilterBindingsTypes> {
54    inner: Mutex<TableInner<I, E, BT>>,
55}
56
57struct TableInner<I: IpExt, E, BT: FilterBindingsTypes> {
58    /// A connection is inserted into the map twice: once for the original
59    /// tuple, and once for the reply tuple.
60    table: HashMap<Tuple<I>, Arc<ConnectionShared<I, E, BT>>>,
61    /// A timer for triggering garbage collection events.
62    gc_timer: BT::Timer,
63    /// The number of times the table size limit was hit.
64    table_limit_hits: u32,
65    /// Of the times the table limit was hit, the number of times we had to drop
66    /// a packet because we couldn't make space in the table.
67    table_limit_drops: u32,
68}
69
70impl<I: IpExt, E, BT: FilterBindingsTypes> Table<I, E, BT> {
71    /// Returns whether the table contains a connection for the specified tuple.
72    ///
73    /// This is for NAT to determine whether a generated tuple will clash with
74    /// one already in the map. While it might seem inefficient, to require
75    /// locking in a loop, taking an uncontested lock is going to be
76    /// significantly faster than the RNG used to allocate NAT parameters.
77    pub fn contains_tuple(&self, tuple: &Tuple<I>) -> bool {
78        self.inner.lock().table.contains_key(tuple)
79    }
80
81    /// Returns a [`Connection`] for the flow indexed by `tuple`, if one exists.
82    pub(crate) fn get_shared_connection(
83        &self,
84        tuple: &Tuple<I>,
85    ) -> Option<Arc<ConnectionShared<I, E, BT>>> {
86        let guard = self.inner.lock();
87        let conn = guard.table.get(&tuple)?;
88        Some(conn.clone())
89    }
90
91    /// Returns a [`Connection`] for the flow indexed by `tuple`, if one exists.
92    pub fn get_connection(&self, tuple: &Tuple<I>) -> Option<Connection<I, E, BT>> {
93        let guard = self.inner.lock();
94        let conn = guard.table.get(&tuple)?;
95        Some(Connection::Shared(conn.clone()))
96    }
97
98    /// Returns the number of entries in the table.
99    ///
100    /// NOTE: This is usually twice the number of connections, but self-connected sockets will only
101    /// have a single entry.
102    #[cfg(feature = "testutils")]
103    pub fn num_entries(&self) -> usize {
104        self.inner.lock().table.len()
105    }
106
107    /// Removes the [`Connection`] for the flow indexed by `tuple`, if one exists,
108    /// and returns it to the caller.
109    #[cfg(feature = "testutils")]
110    pub fn remove_connection(&mut self, tuple: &Tuple<I>) -> Option<Connection<I, E, BT>> {
111        let mut guard = self.inner.lock();
112
113        // Remove the entry indexed by the tuple.
114        let conn = guard.table.remove(&tuple)?;
115        let (original, reply) = (&conn.inner.original_tuple, &conn.inner.reply_tuple);
116
117        // If this is not a self-connected flow, we need to remove the other tuple on
118        // which the connection is indexed.
119        if original != reply {
120            if tuple == original {
121                assert!(guard.table.remove(reply).is_some());
122            } else {
123                assert!(guard.table.remove(original).is_some());
124            }
125        }
126
127        Some(Connection::Shared(conn))
128    }
129}
130
131fn schedule_gc<BC>(bindings_ctx: &mut BC, timer: &mut BC::Timer)
132where
133    BC: TimerContext,
134{
135    let _ = bindings_ctx.schedule_timer(GC_INTERVAL, timer);
136}
137
138impl<I: IpExt, E, BC: FilterBindingsContext> Table<I, E, BC> {
139    pub(crate) fn new<CC: CoreTimerContext<FilterTimerId<I>, BC>>(bindings_ctx: &mut BC) -> Self {
140        Self {
141            inner: Mutex::new(TableInner {
142                table: HashMap::new(),
143                gc_timer: CC::new_timer(
144                    bindings_ctx,
145                    FilterTimerId::ConntrackGc(IpVersionMarker::<I>::new()),
146                ),
147                table_limit_hits: 0,
148                table_limit_drops: 0,
149            }),
150        }
151    }
152}
153
154impl<
155        I: IpExt,
156        E: Default + Send + Sync + PartialEq + CompatibleWith + 'static,
157        BC: FilterBindingsContext,
158    > Table<I, E, BC>
159{
160    /// Attempts to insert the `Connection` into the table.
161    ///
162    /// To be called once a packet for the connection has passed all filtering.
163    /// The boolean return value represents whether the connection was newly
164    /// added to the connection tracking state.
165    ///
166    /// This is on [`Table`] instead of [`Connection`] because conntrack needs
167    /// to be able to manipulate its internal map.
168    pub(crate) fn finalize_connection(
169        &self,
170        bindings_ctx: &mut BC,
171        connection: Connection<I, E, BC>,
172    ) -> Result<(bool, Option<Arc<ConnectionShared<I, E, BC>>>), FinalizeConnectionError> {
173        let exclusive = match connection {
174            Connection::Exclusive(c) => c,
175            // Given that make_shared is private, the only way for us to receive
176            // a shared connection is if it was already present in the map. This
177            // is far and away the most common case under normal operation.
178            Connection::Shared(inner) => return Ok((false, Some(inner))),
179        };
180
181        if exclusive.do_not_insert {
182            return Ok((false, None));
183        }
184
185        let mut guard = self.inner.lock();
186
187        if guard.table.len() >= MAXIMUM_ENTRIES {
188            guard.table_limit_hits = guard.table_limit_hits.saturating_add(1);
189
190            struct Info<'a, I: IpExt, BT: FilterBindingsTypes> {
191                original_tuple: &'a Tuple<I>,
192                reply_tuple: &'a Tuple<I>,
193                lifecycle: EstablishmentLifecycle,
194                last_seen: BT::Instant,
195            }
196
197            let mut info: Option<Info<'_, I, BC>> = None;
198
199            let now = bindings_ctx.now();
200            // Find a non-established connection to evict.
201            //
202            // 1. If a connection is expired, immediately choose it.
203            // 2. Otherwise, pick the connection that is "least established".
204            //    - SeenOriginal is less established than SeenReply
205            //    - A connection is less established than another with the same
206            //      establishment lifecycle if it saw a packet less recently.
207            //
208            // If all connections are established, then we can't free any space
209            // and report the error to the caller.
210            for (_, conn) in &guard.table {
211                let state = conn.state.lock();
212                if state.is_expired(now) {
213                    info = Some(Info {
214                        original_tuple: &conn.inner.original_tuple,
215                        reply_tuple: &conn.inner.reply_tuple,
216                        lifecycle: state.establishment_lifecycle,
217                        last_seen: state.last_packet_time,
218                    });
219                    break;
220                }
221
222                match state.establishment_lifecycle {
223                    EstablishmentLifecycle::SeenOriginal | EstablishmentLifecycle::SeenReply => {
224                        match &info {
225                            None => {
226                                info = Some(Info {
227                                    original_tuple: &conn.inner.original_tuple,
228                                    reply_tuple: &conn.inner.reply_tuple,
229                                    lifecycle: state.establishment_lifecycle,
230                                    last_seen: state.last_packet_time,
231                                })
232                            }
233                            Some(existing) => {
234                                if state.establishment_lifecycle < existing.lifecycle
235                                    || (state.establishment_lifecycle == existing.lifecycle
236                                        && state.last_packet_time < existing.last_seen)
237                                {
238                                    info = Some(Info {
239                                        original_tuple: &conn.inner.original_tuple,
240                                        reply_tuple: &conn.inner.reply_tuple,
241                                        lifecycle: state.establishment_lifecycle,
242                                        last_seen: state.last_packet_time,
243                                    })
244                                }
245                            }
246                        }
247                    }
248                    EstablishmentLifecycle::Established => {}
249                }
250            }
251
252            if let Some(Info { original_tuple, reply_tuple, .. }) = info {
253                let original_tuple = original_tuple.clone();
254                let reply_tuple = reply_tuple.clone();
255
256                assert!(guard.table.remove(&original_tuple).is_some());
257                if original_tuple != reply_tuple {
258                    assert!(guard.table.remove(&reply_tuple).is_some());
259                }
260            } else {
261                guard.table_limit_drops = guard.table_limit_drops.saturating_add(1);
262                return Err(FinalizeConnectionError::TableFull);
263            }
264        }
265
266        // The expected case here is that there isn't a conflict.
267        //
268        // Normally, we'd want to use the entry API to reduce the number of map
269        // lookups, but this setup allows us to completely avoid any heap
270        // allocations until we're sure that the insertion will succeed. This
271        // wastes a little CPU in the common case to avoid pathological behavior
272        // in degenerate cases.
273        if guard.table.contains_key(&exclusive.inner.original_tuple)
274            || guard.table.contains_key(&exclusive.inner.reply_tuple)
275        {
276            // NOTE: It's possible for the first two packets (or more) in the
277            // same flow to create ExclusiveConnections. Typically packets for
278            // the same flow are handled sequentically, so each subsequent
279            // packet should see the connection created by the first one.
280            // However, it is possible (e.g. if these two packets arrive on
281            // different interfaces) for them to race.
282            //
283            // In this case, subsequent packets would be reported as conflicts.
284            // To avoid this race condition, we check whether the conflicting
285            // connection in the table is actually the same as the connection
286            // that we are attempting to finalize; if so, we can simply adopt
287            // the already-finalized connection.
288            let conn = if let Some(conn) = guard.table.get(&exclusive.inner.original_tuple) {
289                conn
290            } else {
291                guard
292                    .table
293                    .get(&exclusive.inner.reply_tuple)
294                    .expect("checked that tuple is in table and table is locked")
295            };
296            if conn.compatible_with(&exclusive) {
297                return Ok((false, Some(conn.clone())));
298            }
299
300            // TODO(https://fxbug.dev/372549231): add a counter for this error.
301            Err(FinalizeConnectionError::Conflict)
302        } else {
303            let shared = exclusive.make_shared();
304            let clone = Arc::clone(&shared);
305
306            let res = guard.table.insert(shared.inner.original_tuple.clone(), shared.clone());
307            debug_assert!(res.is_none());
308
309            if shared.inner.reply_tuple != shared.inner.original_tuple {
310                let res = guard.table.insert(shared.inner.reply_tuple.clone(), shared);
311                debug_assert!(res.is_none());
312            }
313
314            // For the most part, this will only schedule the timer once, when
315            // the first packet hits the netstack. However, since the GC timer
316            // is only rescheduled during GC when the table has entries, it's
317            // possible that this will be called again if the table ever becomes
318            // empty.
319            if bindings_ctx.scheduled_instant(&mut guard.gc_timer).is_none() {
320                schedule_gc(bindings_ctx, &mut guard.gc_timer);
321            }
322
323            Ok((true, Some(clone)))
324        }
325    }
326}
327
328impl<I: IpExt, E: Default, BC: FilterBindingsContext> Table<I, E, BC> {
329    /// Returns a [`Connection`] for the packet's flow. If a connection does not
330    /// currently exist, a new one is created.
331    ///
332    /// At the same time, process the packet for the connection, updating
333    /// internal connection state.
334    ///
335    /// After processing is complete, you must call
336    /// [`finalize_connection`](Table::finalize_connection) with this
337    /// connection.
338    pub(crate) fn get_connection_for_packet_and_update<P: IpPacket<I>>(
339        &self,
340        bindings_ctx: &BC,
341        packet: &P,
342    ) -> Result<Option<(Connection<I, E, BC>, ConnectionDirection)>, GetConnectionError<I, E, BC>>
343    {
344        let Some(packet) = PacketMetadata::new(packet) else {
345            return Ok(None);
346        };
347
348        let mut connection = match self.inner.lock().table.get(&packet.tuple) {
349            Some(connection) => Connection::Shared(connection.clone()),
350            None => match ConnectionExclusive::from_deconstructed_packet(bindings_ctx, &packet) {
351                None => return Ok(None),
352                Some(c) => Connection::Exclusive(c),
353            },
354        };
355
356        let direction = connection
357            .direction(&packet.tuple)
358            .expect("tuple must match connection as we just looked up connection by tuple");
359
360        match connection.update(bindings_ctx, &packet, direction) {
361            Ok(ConnectionUpdateAction::NoAction) => Ok(Some((connection, direction))),
362            Ok(ConnectionUpdateAction::RemoveEntry) => match connection {
363                Connection::Exclusive(mut conn) => {
364                    conn.do_not_insert = true;
365                    Ok(Some((Connection::Exclusive(conn), direction)))
366                }
367                Connection::Shared(conn) => {
368                    // RACE: It's possible that GC already removed the
369                    // connection from the table, since we released the table
370                    // lock while updating the connection.
371                    let mut guard = self.inner.lock();
372                    let _ = guard.table.remove(&conn.inner.original_tuple);
373                    let _ = guard.table.remove(&conn.inner.reply_tuple);
374
375                    Ok(Some((Connection::Shared(conn), direction)))
376                }
377            },
378            Err(ConnectionUpdateError::InvalidPacket) => {
379                Err(GetConnectionError::InvalidPacket(connection, direction))
380            }
381        }
382    }
383
384    pub(crate) fn perform_gc(&self, bindings_ctx: &mut BC) {
385        let now = bindings_ctx.now();
386        let mut guard = self.inner.lock();
387
388        // Sadly, we can't easily remove entries from the map in-place for two
389        // reasons:
390        // - HashMap::retain() will look at each connection twice, since it will
391        // be inserted under both tuples. If a packet updates last_packet_time
392        // between these two checks, we might remove one tuple of the connection
393        // but not the other, leaving a single tuple in the table, which breaks
394        // a core invariant.
395        // - You can't modify a std::HashMap while iterating over it.
396        let to_remove: Vec<_> = guard
397            .table
398            .iter()
399            .filter_map(|(tuple, conn)| {
400                if *tuple == conn.inner.original_tuple && conn.is_expired(now) {
401                    Some((conn.inner.original_tuple.clone(), conn.inner.reply_tuple.clone()))
402                } else {
403                    None
404                }
405            })
406            .collect();
407
408        for (original_tuple, reply_tuple) in to_remove {
409            assert!(guard.table.remove(&original_tuple).is_some());
410            if reply_tuple != original_tuple {
411                assert!(guard.table.remove(&reply_tuple).is_some());
412            }
413        }
414
415        // The table is only expected to be empty in exceptional cases, or
416        // during tests. The test case especially important, because some tests
417        // will wait for core to quiesce by waiting for timers to stop firing.
418        // By only rescheduling when there are still entries in the table, we
419        // ensure that we won't enter an infinite timer firing/scheduling loop.
420        if !guard.table.is_empty() {
421            schedule_gc(bindings_ctx, &mut guard.gc_timer);
422        }
423    }
424}
425
426impl<I: IpExt, E: Inspectable, BT: FilterBindingsTypes> Inspectable for Table<I, E, BT> {
427    fn record<Inspector: netstack3_base::Inspector>(&self, inspector: &mut Inspector) {
428        let guard = self.inner.lock();
429
430        inspector.record_usize("num_entries", guard.table.len());
431        inspector.record_uint("table_limit_hits", guard.table_limit_hits);
432        inspector.record_uint("table_limit_drops", guard.table_limit_drops);
433
434        inspector.record_child("connections", |inspector| {
435            guard
436                .table
437                .iter()
438                .filter_map(|(tuple, connection)| {
439                    if *tuple == connection.inner.original_tuple {
440                        Some(connection)
441                    } else {
442                        None
443                    }
444                })
445                .for_each(|connection| {
446                    inspector.record_unnamed_child(|inspector| {
447                        inspector.delegate_inspectable(connection.as_ref())
448                    });
449                });
450        });
451    }
452}
453
454/// A tuple for a flow in a single direction.
455#[derive(Debug, Clone, PartialEq, Eq, Hash, GenericOverIp)]
456#[generic_over_ip(I, Ip)]
457pub struct Tuple<I: IpExt> {
458    /// The IP protocol number of the flow.
459    pub protocol: TransportProtocol,
460    /// The source IP address of the flow.
461    pub src_addr: I::Addr,
462    /// The destination IP address of the flow.
463    pub dst_addr: I::Addr,
464    /// The transport-layer source port or ID of the flow.
465    pub src_port_or_id: u16,
466    /// The transport-layer destination port or ID of the flow.
467    pub dst_port_or_id: u16,
468}
469
470impl<I: IpExt> Tuple<I> {
471    /// Creates a `Tuple` from an `IpPacket`, if possible.
472    ///
473    /// Returns `None` if the packet doesn't have an inner transport packet.
474    #[cfg(test)]
475    pub(crate) fn from_packet<'a, P: IpPacket<I>>(packet: &'a P) -> Option<Self> {
476        // Subtlety: For ICMP packets, only request/response messages will have
477        // a transport packet defined (and currently only ECHO messages do).
478        // This gets us basic tracking for free, and lets us implicitly ignore
479        // ICMP errors, which are not meant to be tracked.
480        //
481        // If other message types eventually have TransportPacket impls, then
482        // this would lead to confusing different message types that happen to
483        // have the same ID.
484        let transport_packet_data = packet.maybe_transport_packet().transport_packet_data()?;
485        Some(Self::from_packet_and_transport_data(packet, &transport_packet_data))
486    }
487
488    fn from_packet_and_transport_data<'a, P: IpPacket<I>>(
489        packet: &'a P,
490        transport_packet_data: &TransportPacketData,
491    ) -> Self {
492        let protocol = I::map_ip(packet.protocol(), |proto| proto.into(), |proto| proto.into());
493
494        let (src_port, dst_port) = match transport_packet_data {
495            TransportPacketData::Tcp { src_port, dst_port, .. }
496            | TransportPacketData::Generic { src_port, dst_port } => (*src_port, *dst_port),
497        };
498
499        Self {
500            protocol: protocol,
501            src_addr: packet.src_addr(),
502            dst_addr: packet.dst_addr(),
503            src_port_or_id: src_port,
504            dst_port_or_id: dst_port,
505        }
506    }
507
508    /// Returns the inverted version of the tuple.
509    ///
510    /// This means the src and dst addresses are swapped. For TCP and UDP, the
511    /// ports are reversed, but for ICMP, where the ports stand in for other
512    /// information, things are more complicated.
513    pub(crate) fn invert(self) -> Tuple<I> {
514        // TODO(https://fxbug.dev/328064082): Support tracking different ICMP
515        // request/response types.
516        Self {
517            protocol: self.protocol,
518            src_addr: self.dst_addr,
519            dst_addr: self.src_addr,
520            src_port_or_id: self.dst_port_or_id,
521            dst_port_or_id: self.src_port_or_id,
522        }
523    }
524}
525
526impl<I: IpExt> Inspectable for Tuple<I> {
527    fn record<Inspector: netstack3_base::Inspector>(&self, inspector: &mut Inspector) {
528        inspector.record_debug("protocol", self.protocol);
529        inspector.record_ip_addr("src_addr", self.src_addr);
530        inspector.record_ip_addr("dst_addr", self.dst_addr);
531        inspector.record_usize("src_port_or_id", self.src_port_or_id);
532        inspector.record_usize("dst_port_or_id", self.dst_port_or_id);
533    }
534}
535
536/// The direction of a packet when compared to a given connection.
537#[derive(Debug, Copy, Clone, PartialEq, Eq)]
538pub enum ConnectionDirection {
539    /// The packet is traveling in the same direction as the first packet seen
540    /// for the [`Connection`].
541    Original,
542
543    /// The packet is traveling in the opposite direction from the first packet
544    /// seen for the [`Connection`].
545    Reply,
546}
547
548/// An error returned from [`Table::finalize_connection`].
549#[derive(Debug)]
550pub(crate) enum FinalizeConnectionError {
551    /// There is a conflicting connection already tracked by conntrack. The
552    /// to-be-finalized connection was not inserted into the table.
553    Conflict,
554
555    /// The table has reached the hard size cap and no room could be made.
556    TableFull,
557}
558
559/// Type to track additional processing required after updating a connection.
560#[derive(Debug, PartialEq, Eq)]
561enum ConnectionUpdateAction {
562    /// Processing completed and no further action necessary.
563    NoAction,
564
565    /// The entry for this connection should be removed from the conntrack table.
566    RemoveEntry,
567}
568
569/// An error returned from [`Connection::update`].
570#[derive(Debug, PartialEq, Eq)]
571enum ConnectionUpdateError {
572    /// The packet was invalid. The caller may decide whether to drop this
573    /// packet or not.
574    InvalidPacket,
575}
576
577/// An error returned from [`Table::get_connection_for_packet_and_update`].
578#[derive(Derivative)]
579#[derivative(Debug(bound = "E: Debug"))]
580pub(crate) enum GetConnectionError<I: IpExt, E, BT: FilterBindingsTypes> {
581    /// The packet was invalid. The caller may decide whether to drop it or not.
582    InvalidPacket(Connection<I, E, BT>, ConnectionDirection),
583}
584
585/// A `Connection` contains all of the information about a single connection
586/// tracked by conntrack.
587#[derive(Derivative)]
588#[derivative(Debug(bound = "E: Debug"))]
589pub enum Connection<I: IpExt, E, BT: FilterBindingsTypes> {
590    /// A connection that is directly owned by the packet that originated the
591    /// connection and no others. All fields are modifiable.
592    Exclusive(ConnectionExclusive<I, E, BT>),
593
594    /// This is an existing connection, and there are possibly many other
595    /// packets that are concurrently modifying it.
596    Shared(Arc<ConnectionShared<I, E, BT>>),
597}
598
599/// An error when attempting to retrieve the underlying conntrack entry from a
600/// weak handle to it.
601#[derive(Debug)]
602pub enum WeakConnectionError {
603    /// The entry was removed from the table after the weak handle was created.
604    EntryRemoved,
605    /// The entry does not match the type that is expected (due to an IP version
606    /// mismatch, for example).
607    InvalidEntry,
608}
609
610/// A type-erased weak handle to a connection tracking entry.
611///
612/// We use type erasure here to get rid of the parameterization on IP version;
613/// this handle is meant to be able to transit the device layer and at that
614/// point things are not parameterized on IP version (IPv4 and IPv6 packets both
615/// end up in the same device queue, for example).
616///
617/// When this is received for incoming packets, [`WeakConnection::into_inner`]
618/// can be used to downcast to the expected concrete [`Connection`] type.
619#[derive(Debug, Clone)]
620pub struct WeakConnection(pub(crate) Weak<dyn Any + Send + Sync>);
621
622impl WeakConnection {
623    /// Creates a new type-erased weak handle to the provided conntrack entry,
624    /// provided it is a shared entry.
625    pub fn new<I: IpExt, BT: FilterBindingsTypes + 'static, E: Send + Sync + 'static>(
626        conn: &Connection<I, E, BT>,
627    ) -> Option<Self> {
628        let shared = match conn {
629            Connection::Exclusive(_) => return None,
630            Connection::Shared(shared) => shared,
631        };
632        let weak = Arc::downgrade(shared);
633        Some(Self(weak))
634    }
635
636    /// Attempts to upgrade the provided weak handle to the conntrack entry and
637    /// downcast it to the specified concrete [`Connection`] type.
638    ///
639    /// Fails if either the weak handle cannot be upgraded (because the conntrack
640    /// entry has since been removed), or the type-erased handle cannot be downcast
641    /// to the concrete type (because the packet was modified after the creation of
642    /// this handle such that it no longer matches, e.g. the IP version of the
643    /// connection).
644    pub fn into_inner<I: IpExt, BT: FilterBindingsTypes + 'static, E: Send + Sync + 'static>(
645        self,
646    ) -> Result<Connection<I, E, BT>, WeakConnectionError> {
647        let Self(inner) = self;
648        let shared = inner
649            .upgrade()
650            .ok_or(WeakConnectionError::EntryRemoved)?
651            .downcast()
652            .map_err(|_err: Arc<_>| WeakConnectionError::InvalidEntry)?;
653        Ok(Connection::Shared(shared))
654    }
655}
656
657impl<I: IpExt, E, BT: FilterBindingsTypes> Connection<I, E, BT> {
658    /// Returns the tuple of the original direction of this connection.
659    pub fn original_tuple(&self) -> &Tuple<I> {
660        match self {
661            Connection::Exclusive(c) => &c.inner.original_tuple,
662            Connection::Shared(c) => &c.inner.original_tuple,
663        }
664    }
665
666    /// Returns the tuple of the reply direction of this connection.
667    pub(crate) fn reply_tuple(&self) -> &Tuple<I> {
668        match self {
669            Connection::Exclusive(c) => &c.inner.reply_tuple,
670            Connection::Shared(c) => &c.inner.reply_tuple,
671        }
672    }
673
674    /// Returns a reference to the [`Connection::external_data`] field.
675    pub fn external_data(&self) -> &E {
676        match self {
677            Connection::Exclusive(c) => &c.inner.external_data,
678            Connection::Shared(c) => &c.inner.external_data,
679        }
680    }
681
682    /// Returns the direction the tuple represents with respect to the
683    /// connection.
684    pub(crate) fn direction(&self, tuple: &Tuple<I>) -> Option<ConnectionDirection> {
685        let (original, reply) = match self {
686            Connection::Exclusive(c) => (&c.inner.original_tuple, &c.inner.reply_tuple),
687            Connection::Shared(c) => (&c.inner.original_tuple, &c.inner.reply_tuple),
688        };
689
690        // The ordering here is sadly mildly load-bearing. For self-connected
691        // sockets, the first comparison will be true, so having the original
692        // tuple first would mean that the connection is never marked
693        // established.
694        //
695        // This ordering means that all self-connected connections will be
696        // marked as established immediately upon receiving the first packet.
697        if tuple == reply {
698            Some(ConnectionDirection::Reply)
699        } else if tuple == original {
700            Some(ConnectionDirection::Original)
701        } else {
702            None
703        }
704    }
705
706    /// Returns a copy of the internal connection state
707    #[allow(dead_code)]
708    pub(crate) fn state(&self) -> ConnectionState<BT> {
709        match self {
710            Connection::Exclusive(c) => c.state.clone(),
711            Connection::Shared(c) => c.state.lock().clone(),
712        }
713    }
714}
715
716impl<I: IpExt, E, BC: FilterBindingsContext> Connection<I, E, BC> {
717    fn update(
718        &mut self,
719        bindings_ctx: &BC,
720        packet: &PacketMetadata<I>,
721        direction: ConnectionDirection,
722    ) -> Result<ConnectionUpdateAction, ConnectionUpdateError> {
723        let now = bindings_ctx.now();
724
725        match self {
726            Connection::Exclusive(c) => c.state.update(direction, &packet.transport_data, now),
727            Connection::Shared(c) => c.state.lock().update(direction, &packet.transport_data, now),
728        }
729    }
730}
731
732/// Fields common to both [`ConnectionExclusive`] and [`ConnectionShared`].
733#[derive(Derivative)]
734#[derivative(Debug(bound = "E: Debug"), PartialEq(bound = "E: PartialEq"))]
735pub struct ConnectionCommon<I: IpExt, E> {
736    /// The 5-tuple for the connection in the original direction. This is
737    /// arbitrary, and is just the direction where a packet was first seen.
738    pub(crate) original_tuple: Tuple<I>,
739
740    /// The 5-tuple for the connection in the reply direction. This is what's
741    /// used for packet rewriting for NAT.
742    pub(crate) reply_tuple: Tuple<I>,
743
744    /// Extra information that is not needed by the conntrack module itself. In
745    /// the case of NAT, we expect this to contain things such as the kind of
746    /// rewriting that will occur (e.g. SNAT vs DNAT).
747    pub(crate) external_data: E,
748}
749
750impl<I: IpExt, E: Inspectable> Inspectable for ConnectionCommon<I, E> {
751    fn record<Inspector: netstack3_base::Inspector>(&self, inspector: &mut Inspector) {
752        inspector.record_child("original_tuple", |inspector| {
753            inspector.delegate_inspectable(&self.original_tuple);
754        });
755
756        inspector.record_child("reply_tuple", |inspector| {
757            inspector.delegate_inspectable(&self.reply_tuple);
758        });
759
760        // We record external_data as an inspectable because that allows us to
761        // prevent accidentally leaking data, which could happen if we just used
762        // the Debug impl.
763        inspector.record_child("external_data", |inspector| {
764            inspector.delegate_inspectable(&self.external_data);
765        });
766    }
767}
768
769#[derive(Debug, Clone)]
770enum ProtocolState {
771    Tcp(tcp::Connection),
772    Udp,
773    Other,
774}
775
776impl ProtocolState {
777    fn update(
778        &mut self,
779        dir: ConnectionDirection,
780        transport_data: &TransportPacketData,
781    ) -> Result<ConnectionUpdateAction, ConnectionUpdateError> {
782        match self {
783            ProtocolState::Tcp(tcp_conn) => {
784                let (segment, payload_len) = assert_matches!(
785                    transport_data,
786                    TransportPacketData::Tcp { segment, payload_len, .. } => (segment, payload_len)
787                );
788                tcp_conn.update(&segment, *payload_len, dir)
789            }
790            ProtocolState::Udp | ProtocolState::Other => Ok(ConnectionUpdateAction::NoAction),
791        }
792    }
793}
794
795/// The lifecycle of the connection getting to being established.
796///
797/// To mimic Linux behavior, we require seeing three packets in order to mark a
798/// connection established.
799/// 1. Original
800/// 2. Reply
801/// 3. Original
802///
803/// The first packet is implicit in the creation of the connection when the
804/// first packet is seen.
805#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
806enum EstablishmentLifecycle {
807    SeenOriginal,
808    SeenReply,
809    Established,
810}
811
812impl EstablishmentLifecycle {
813    fn update(self, dir: ConnectionDirection) -> Self {
814        match self {
815            EstablishmentLifecycle::SeenOriginal => match dir {
816                ConnectionDirection::Original => self,
817                ConnectionDirection::Reply => EstablishmentLifecycle::SeenReply,
818            },
819            EstablishmentLifecycle::SeenReply => match dir {
820                ConnectionDirection::Original => EstablishmentLifecycle::Established,
821                ConnectionDirection::Reply => self,
822            },
823            EstablishmentLifecycle::Established => self,
824        }
825    }
826}
827
828/// Dynamic per-connection state.
829#[derive(Derivative)]
830#[derivative(Clone(bound = ""), Debug(bound = ""))]
831pub(crate) struct ConnectionState<BT: FilterBindingsTypes> {
832    /// The time the last packet was seen for this connection (in either of the
833    /// original or reply directions).
834    last_packet_time: BT::Instant,
835
836    /// Where in the generic establishment lifecycle the current connection is.
837    establishment_lifecycle: EstablishmentLifecycle,
838
839    /// State that is specific to a given protocol (e.g. TCP or UDP).
840    protocol_state: ProtocolState,
841}
842
843impl<BT: FilterBindingsTypes> ConnectionState<BT> {
844    fn update(
845        &mut self,
846        dir: ConnectionDirection,
847        transport_data: &TransportPacketData,
848        now: BT::Instant,
849    ) -> Result<ConnectionUpdateAction, ConnectionUpdateError> {
850        if self.last_packet_time < now {
851            self.last_packet_time = now;
852        }
853
854        self.establishment_lifecycle = self.establishment_lifecycle.update(dir);
855
856        self.protocol_state.update(dir, transport_data)
857    }
858
859    fn is_expired(&self, now: BT::Instant) -> bool {
860        let duration = now.saturating_duration_since(self.last_packet_time);
861
862        let expiry_duration = match &self.protocol_state {
863            ProtocolState::Tcp(tcp_conn) => tcp_conn.expiry_duration(self.establishment_lifecycle),
864            ProtocolState::Udp => CONNECTION_EXPIRY_TIME_UDP,
865            // ICMP ends up here. The ICMP messages we track are simple
866            // request/response protocols, so we always expect to get a response
867            // quickly (within 2 RTT). Any followup messages (e.g. if making
868            // periodic ECHO requests) should reuse this existing connection.
869            ProtocolState::Other => CONNECTION_EXPIRY_OTHER,
870        };
871
872        duration >= expiry_duration
873    }
874}
875
876impl<BT: FilterBindingsTypes> Inspectable for ConnectionState<BT> {
877    fn record<Inspector: netstack3_base::Inspector>(&self, inspector: &mut Inspector) {
878        inspector.record_bool(
879            "established",
880            match self.establishment_lifecycle {
881                EstablishmentLifecycle::SeenOriginal | EstablishmentLifecycle::SeenReply => false,
882                EstablishmentLifecycle::Established => true,
883            },
884        );
885        inspector.record_inspectable_value("last_packet_time", &self.last_packet_time);
886    }
887}
888
889/// A conntrack connection with single ownership.
890///
891/// Because of this, many fields may be updated without synchronization. There
892/// is no chance of messing with other packets for this connection or ending up
893/// out-of-sync with the table (e.g. by changing the tuples once the connection
894/// has been inserted).
895#[derive(Derivative)]
896#[derivative(Debug(bound = "E: Debug"))]
897pub struct ConnectionExclusive<I: IpExt, E, BT: FilterBindingsTypes> {
898    pub(crate) inner: ConnectionCommon<I, E>,
899    pub(crate) state: ConnectionState<BT>,
900
901    /// When true, do not insert the connection into the conntrack table.
902    ///
903    /// This allows the stack to still operate against the connection (e.g. for
904    /// NAT), while guaranteeing that it won't make it into the table.
905    do_not_insert: bool,
906}
907
908impl<I: IpExt, E, BT: FilterBindingsTypes> ConnectionExclusive<I, E, BT> {
909    /// Turn this exclusive connection into a shared one. This is required in
910    /// order to insert into the [`Table`] table.
911    fn make_shared(self) -> Arc<ConnectionShared<I, E, BT>> {
912        Arc::new(ConnectionShared { inner: self.inner, state: Mutex::new(self.state) })
913    }
914
915    pub(crate) fn reply_tuple(&self) -> &Tuple<I> {
916        &self.inner.reply_tuple
917    }
918
919    pub(crate) fn rewrite_reply_dst_addr(&mut self, addr: I::Addr) {
920        self.inner.reply_tuple.dst_addr = addr;
921    }
922
923    pub(crate) fn rewrite_reply_src_addr(&mut self, addr: I::Addr) {
924        self.inner.reply_tuple.src_addr = addr;
925    }
926
927    pub(crate) fn rewrite_reply_src_port_or_id(&mut self, port_or_id: u16) {
928        self.inner.reply_tuple.src_port_or_id = port_or_id;
929        match self.inner.reply_tuple.protocol {
930            TransportProtocol::Icmp => {
931                // ICMP uses a single ID and conntrack keeps track of it in both
932                // ID fields. This makes it easier to keep a single logic to
933                // flip the direction. Hence we need to update the rest of the
934                // tuple.
935                //
936                // TODO(https://fxbug.dev/328064082): Probably needs revisiting
937                // as part of better support for ICMP request/response.
938                self.inner.reply_tuple.dst_port_or_id = port_or_id;
939            }
940            TransportProtocol::Tcp | TransportProtocol::Udp | TransportProtocol::Other(_) => {}
941        }
942    }
943
944    pub(crate) fn rewrite_reply_dst_port_or_id(&mut self, port_or_id: u16) {
945        self.inner.reply_tuple.dst_port_or_id = port_or_id;
946        match self.inner.reply_tuple.protocol {
947            TransportProtocol::Icmp => {
948                // ICMP uses a single ID and conntrack keeps track of it in both
949                // ID fields. This makes it easier to keep a single logic to
950                // flip the direction. Hence we need to update the rest of the
951                // tuple.
952                //
953                // TODO(https://fxbug.dev/328064082): Probably needs revisiting
954                // as part of better support for ICMP request/response.
955                self.inner.reply_tuple.src_port_or_id = port_or_id;
956            }
957            TransportProtocol::Tcp | TransportProtocol::Udp | TransportProtocol::Other(_) => {}
958        }
959    }
960}
961
962impl<I: IpExt, E: Default, BC: FilterBindingsContext> ConnectionExclusive<I, E, BC> {
963    pub(crate) fn from_deconstructed_packet(
964        bindings_ctx: &BC,
965        PacketMetadata { tuple, transport_data }: &PacketMetadata<I>,
966    ) -> Option<Self> {
967        let reply_tuple = tuple.clone().invert();
968        let self_connected = reply_tuple == *tuple;
969
970        Some(Self {
971            inner: ConnectionCommon {
972                original_tuple: tuple.clone(),
973                reply_tuple,
974                external_data: E::default(),
975            },
976            state: ConnectionState {
977                last_packet_time: bindings_ctx.now(),
978                establishment_lifecycle: EstablishmentLifecycle::SeenOriginal,
979                protocol_state: match tuple.protocol {
980                    TransportProtocol::Tcp => {
981                        let (segment, payload_len) = transport_data
982                            .tcp_segment_and_len()
983                            .expect("protocol was TCP, so transport data should have TCP info");
984
985                        ProtocolState::Tcp(tcp::Connection::new(
986                            segment,
987                            payload_len,
988                            self_connected,
989                        )?)
990                    }
991                    TransportProtocol::Udp => ProtocolState::Udp,
992                    TransportProtocol::Icmp | TransportProtocol::Other(_) => ProtocolState::Other,
993                },
994            },
995            do_not_insert: false,
996        })
997    }
998}
999
1000/// A conntrack connection with shared ownership.
1001///
1002/// All fields are private, because other packets, and the conntrack table
1003/// itself, will be depending on them not to change. Fields must be accessed
1004/// through the associated methods.
1005#[derive(Derivative)]
1006#[derivative(Debug(bound = "E: Debug"))]
1007pub struct ConnectionShared<I: IpExt, E, BT: FilterBindingsTypes> {
1008    inner: ConnectionCommon<I, E>,
1009    state: Mutex<ConnectionState<BT>>,
1010}
1011
1012/// The IP-agnostic transport protocol of a packet.
1013#[allow(missing_docs)]
1014#[derive(Copy, Clone, PartialEq, Eq, Hash, GenericOverIp)]
1015#[generic_over_ip()]
1016pub enum TransportProtocol {
1017    Tcp,
1018    Udp,
1019    Icmp,
1020    Other(u8),
1021}
1022
1023impl From<Ipv4Proto> for TransportProtocol {
1024    fn from(value: Ipv4Proto) -> Self {
1025        match value {
1026            Ipv4Proto::Proto(IpProto::Tcp) => TransportProtocol::Tcp,
1027            Ipv4Proto::Proto(IpProto::Udp) => TransportProtocol::Udp,
1028            Ipv4Proto::Icmp => TransportProtocol::Icmp,
1029            v => TransportProtocol::Other(v.into()),
1030        }
1031    }
1032}
1033
1034impl From<Ipv6Proto> for TransportProtocol {
1035    fn from(value: Ipv6Proto) -> Self {
1036        match value {
1037            Ipv6Proto::Proto(IpProto::Tcp) => TransportProtocol::Tcp,
1038            Ipv6Proto::Proto(IpProto::Udp) => TransportProtocol::Udp,
1039            Ipv6Proto::Icmpv6 => TransportProtocol::Icmp,
1040            v => TransportProtocol::Other(v.into()),
1041        }
1042    }
1043}
1044
1045impl From<IpProto> for TransportProtocol {
1046    fn from(value: IpProto) -> Self {
1047        match value {
1048            IpProto::Tcp => TransportProtocol::Tcp,
1049            IpProto::Udp => TransportProtocol::Udp,
1050            v @ IpProto::Reserved => TransportProtocol::Other(v.into()),
1051        }
1052    }
1053}
1054
1055impl Display for TransportProtocol {
1056    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1057        match self {
1058            TransportProtocol::Tcp => write!(f, "TCP"),
1059            TransportProtocol::Udp => write!(f, "UDP"),
1060            TransportProtocol::Icmp => write!(f, "ICMP"),
1061            TransportProtocol::Other(n) => write!(f, "Other({n})"),
1062        }
1063    }
1064}
1065
1066impl Debug for TransportProtocol {
1067    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1068        Display::fmt(&self, f)
1069    }
1070}
1071
1072impl<I: IpExt, E, BT: FilterBindingsTypes> ConnectionShared<I, E, BT> {
1073    fn is_expired(&self, now: BT::Instant) -> bool {
1074        self.state.lock().is_expired(now)
1075    }
1076}
1077
1078impl<I: IpExt, E: CompatibleWith, BT: FilterBindingsTypes> ConnectionShared<I, E, BT> {
1079    /// Returns whether the provided exclusive connection is compatible with this
1080    /// one, to the extent that a shared reference to this tracked connection could
1081    /// be adopted in place of the exclusive connection.
1082    pub(crate) fn compatible_with(&self, conn: &ConnectionExclusive<I, E, BT>) -> bool {
1083        self.inner.original_tuple == conn.inner.original_tuple
1084            && self.inner.reply_tuple == conn.inner.reply_tuple
1085            && self.inner.external_data.compatible_with(&conn.inner.external_data)
1086    }
1087}
1088
1089impl<I: IpExt, E: Inspectable, BT: FilterBindingsTypes> Inspectable for ConnectionShared<I, E, BT> {
1090    fn record<Inspector: netstack3_base::Inspector>(&self, inspector: &mut Inspector) {
1091        inspector.delegate_inspectable(&self.inner);
1092        inspector.delegate_inspectable(&*self.state.lock());
1093    }
1094}
1095
1096/// Allows a caller to check whether a given connection tracking entry (or some
1097/// configuration owned by that entry) is compatible with another.
1098pub trait CompatibleWith {
1099    /// Returns whether the provided entity is compatible with this entity in the
1100    /// context of connection tracking.
1101    fn compatible_with(&self, other: &Self) -> bool;
1102}
1103
1104/// A struct containing relevant fields extracted from the IP and transport
1105/// headers that means we only have to touch the incoming IpPacket once. Also
1106/// acts as a witness type that the tuple and transport data have the same
1107/// transport protocol.
1108pub(crate) struct PacketMetadata<I: IpExt> {
1109    tuple: Tuple<I>,
1110    transport_data: TransportPacketData,
1111}
1112
1113impl<I: IpExt> PacketMetadata<I> {
1114    pub(crate) fn new<P: IpPacket<I>>(packet: &P) -> Option<Self> {
1115        let transport_packet_data = packet.maybe_transport_packet().transport_packet_data()?;
1116
1117        let tuple = Tuple::from_packet_and_transport_data(packet, &transport_packet_data);
1118
1119        match tuple.protocol {
1120            TransportProtocol::Tcp => {
1121                assert_matches!(transport_packet_data, TransportPacketData::Tcp { .. })
1122            }
1123            TransportProtocol::Udp | TransportProtocol::Icmp | TransportProtocol::Other(_) => {
1124                assert_matches!(transport_packet_data, TransportPacketData::Generic { .. })
1125            }
1126        }
1127
1128        Some(Self { tuple, transport_data: transport_packet_data })
1129    }
1130}
1131
1132#[cfg(test)]
1133pub(crate) mod testutils {
1134    use crate::packets::testutil::internal::{FakeIpPacket, FakeUdpPacket, TestIpExt};
1135
1136    /// Create a pair of UDP packets that are inverses of one another. Uses `index` to create
1137    /// packets that are unique.
1138    pub(crate) fn make_test_udp_packets<I: TestIpExt>(
1139        index: u32,
1140    ) -> (FakeIpPacket<I, FakeUdpPacket>, FakeIpPacket<I, FakeUdpPacket>) {
1141        // This ensures that, no matter how big index is, we'll always have
1142        // unique src and dst ports, and thus unique connections.
1143        let src_port = (index % (u16::MAX as u32)) as u16;
1144        let dst_port = (index / (u16::MAX as u32)) as u16;
1145
1146        let packet = FakeIpPacket::<I, _> {
1147            src_ip: I::SRC_IP,
1148            dst_ip: I::DST_IP,
1149            body: FakeUdpPacket { src_port, dst_port },
1150        };
1151        let reply_packet = FakeIpPacket::<I, _> {
1152            src_ip: I::DST_IP,
1153            dst_ip: I::SRC_IP,
1154            body: FakeUdpPacket { src_port: dst_port, dst_port: src_port },
1155        };
1156
1157        (packet, reply_packet)
1158    }
1159}
1160
1161#[cfg(test)]
1162mod tests {
1163    use core::convert::Infallible as Never;
1164
1165    use assert_matches::assert_matches;
1166    use ip_test_macro::ip_test;
1167    use netstack3_base::testutil::FakeTimerCtxExt;
1168    use netstack3_base::{
1169        Control, IntoCoreTimerCtx, Options, SegmentHeader, SeqNum, UnscaledWindowSize,
1170    };
1171    use packet_formats::ip::IpProto;
1172    use test_case::test_case;
1173
1174    use super::testutils::make_test_udp_packets;
1175    use super::*;
1176    use crate::context::testutil::{FakeBindingsCtx, FakeCtx};
1177    use crate::packets::testutil::internal::{
1178        ArbitraryValue, FakeIpPacket, FakeTcpSegment, FakeUdpPacket, TransportPacketExt,
1179    };
1180    use crate::packets::MaybeTransportPacketMut;
1181    use crate::state::IpRoutines;
1182    use crate::testutil::TestIpExt;
1183
1184    struct NoTransportPacket;
1185
1186    impl MaybeTransportPacket for &NoTransportPacket {
1187        fn transport_packet_data(&self) -> Option<TransportPacketData> {
1188            None
1189        }
1190    }
1191
1192    impl<I: IpExt> TransportPacketExt<I> for &NoTransportPacket {
1193        fn proto() -> I::Proto {
1194            I::Proto::from(IpProto::Tcp)
1195        }
1196    }
1197
1198    impl<I: IpExt> MaybeTransportPacketMut<I> for NoTransportPacket {
1199        type TransportPacketMut<'a> = Never;
1200
1201        fn transport_packet_mut(&mut self) -> Option<Self::TransportPacketMut<'_>> {
1202            None
1203        }
1204    }
1205
1206    impl CompatibleWith for () {
1207        fn compatible_with(&self, (): &()) -> bool {
1208            true
1209        }
1210    }
1211
1212    #[test_case(
1213        EstablishmentLifecycle::SeenOriginal,
1214        ConnectionDirection::Original
1215          => EstablishmentLifecycle::SeenOriginal
1216    )]
1217    #[test_case(
1218        EstablishmentLifecycle::SeenOriginal,
1219        ConnectionDirection::Reply
1220          => EstablishmentLifecycle::SeenReply
1221    )]
1222    #[test_case(
1223        EstablishmentLifecycle::SeenReply,
1224        ConnectionDirection::Original
1225          => EstablishmentLifecycle::Established
1226    )]
1227    #[test_case(
1228        EstablishmentLifecycle::SeenReply,
1229        ConnectionDirection::Reply
1230          => EstablishmentLifecycle::SeenReply
1231    )]
1232    #[test_case(
1233        EstablishmentLifecycle::Established,
1234        ConnectionDirection::Original
1235          => EstablishmentLifecycle::Established
1236    )]
1237    #[test_case(
1238        EstablishmentLifecycle::Established,
1239        ConnectionDirection::Reply
1240          => EstablishmentLifecycle::Established
1241    )]
1242    fn establishment_lifecycle_test(
1243        lifecycle: EstablishmentLifecycle,
1244        dir: ConnectionDirection,
1245    ) -> EstablishmentLifecycle {
1246        lifecycle.update(dir)
1247    }
1248
1249    #[ip_test(I)]
1250    #[test_case(TransportProtocol::Udp)]
1251    #[test_case(TransportProtocol::Tcp)]
1252    fn tuple_invert_udp_tcp<I: IpExt + TestIpExt>(protocol: TransportProtocol) {
1253        let orig_tuple = Tuple::<I> {
1254            protocol: protocol,
1255            src_addr: I::SRC_IP,
1256            dst_addr: I::DST_IP,
1257            src_port_or_id: I::SRC_PORT,
1258            dst_port_or_id: I::DST_PORT,
1259        };
1260
1261        let expected = Tuple::<I> {
1262            protocol: protocol,
1263            src_addr: I::DST_IP,
1264            dst_addr: I::SRC_IP,
1265            src_port_or_id: I::DST_PORT,
1266            dst_port_or_id: I::SRC_PORT,
1267        };
1268
1269        let inverted = orig_tuple.invert();
1270
1271        assert_eq!(inverted, expected);
1272    }
1273
1274    #[ip_test(I)]
1275    fn tuple_from_tcp_packet<I: IpExt + TestIpExt>() {
1276        let expected = Tuple::<I> {
1277            protocol: TransportProtocol::Tcp,
1278            src_addr: I::SRC_IP,
1279            dst_addr: I::DST_IP,
1280            src_port_or_id: I::SRC_PORT,
1281            dst_port_or_id: I::DST_PORT,
1282        };
1283
1284        let packet = FakeIpPacket::<I, _> {
1285            src_ip: I::SRC_IP,
1286            dst_ip: I::DST_IP,
1287            body: FakeTcpSegment {
1288                src_port: I::SRC_PORT,
1289                dst_port: I::DST_PORT,
1290                segment: SegmentHeader::arbitrary_value(),
1291                payload_len: 4,
1292            },
1293        };
1294
1295        let tuple = Tuple::from_packet(&packet).expect("valid TCP packet should return a tuple");
1296        assert_eq!(tuple, expected);
1297    }
1298
1299    #[ip_test(I)]
1300    fn tuple_from_packet_no_body<I: IpExt + TestIpExt>() {
1301        let packet = FakeIpPacket::<I, NoTransportPacket> {
1302            src_ip: I::SRC_IP,
1303            dst_ip: I::DST_IP,
1304            body: NoTransportPacket {},
1305        };
1306
1307        let tuple = Tuple::from_packet(&packet);
1308        assert_matches!(tuple, None);
1309    }
1310
1311    #[ip_test(I)]
1312    fn connection_from_tuple<I: IpExt + TestIpExt>() {
1313        let bindings_ctx = FakeBindingsCtx::<I>::new();
1314
1315        let packet = PacketMetadata::new(&FakeIpPacket::<I, _> {
1316            src_ip: I::SRC_IP,
1317            dst_ip: I::DST_IP,
1318            body: FakeUdpPacket { src_port: I::SRC_PORT, dst_port: I::DST_PORT },
1319        })
1320        .unwrap();
1321        let original_tuple = packet.tuple.clone();
1322        let reply_tuple = original_tuple.clone().invert();
1323
1324        let connection =
1325            ConnectionExclusive::<_, (), _>::from_deconstructed_packet(&bindings_ctx, &packet)
1326                .unwrap();
1327
1328        assert_eq!(&connection.inner.original_tuple, &original_tuple);
1329        assert_eq!(&connection.inner.reply_tuple, &reply_tuple);
1330    }
1331
1332    #[ip_test(I)]
1333    fn connection_make_shared_has_same_underlying_info<I: IpExt + TestIpExt>() {
1334        let bindings_ctx = FakeBindingsCtx::<I>::new();
1335
1336        let packet = PacketMetadata::new(&FakeIpPacket::<I, _> {
1337            src_ip: I::SRC_IP,
1338            dst_ip: I::DST_IP,
1339            body: FakeUdpPacket { src_port: I::SRC_PORT, dst_port: I::DST_PORT },
1340        })
1341        .unwrap();
1342        let original_tuple = packet.tuple.clone();
1343        let reply_tuple = original_tuple.clone().invert();
1344
1345        let mut connection =
1346            ConnectionExclusive::from_deconstructed_packet(&bindings_ctx, &packet).unwrap();
1347        connection.inner.external_data = 1234;
1348        let shared = connection.make_shared();
1349
1350        assert_eq!(shared.inner.original_tuple, original_tuple);
1351        assert_eq!(shared.inner.reply_tuple, reply_tuple);
1352        assert_eq!(shared.inner.external_data, 1234);
1353    }
1354
1355    enum ConnectionKind {
1356        Exclusive,
1357        Shared,
1358    }
1359
1360    #[ip_test(I)]
1361    #[test_case(ConnectionKind::Exclusive)]
1362    #[test_case(ConnectionKind::Shared)]
1363    fn connection_getters<I: IpExt + TestIpExt>(connection_kind: ConnectionKind) {
1364        let bindings_ctx = FakeBindingsCtx::<I>::new();
1365
1366        let packet = PacketMetadata::new(&FakeIpPacket::<I, _> {
1367            src_ip: I::SRC_IP,
1368            dst_ip: I::DST_IP,
1369            body: FakeUdpPacket { src_port: I::SRC_PORT, dst_port: I::DST_PORT },
1370        })
1371        .unwrap();
1372        let original_tuple = packet.tuple.clone();
1373        let reply_tuple = original_tuple.clone().invert();
1374
1375        let mut connection =
1376            ConnectionExclusive::from_deconstructed_packet(&bindings_ctx, &packet).unwrap();
1377        connection.inner.external_data = 1234;
1378
1379        let connection = match connection_kind {
1380            ConnectionKind::Exclusive => Connection::Exclusive(connection),
1381            ConnectionKind::Shared => Connection::Shared(connection.make_shared()),
1382        };
1383
1384        assert_eq!(connection.original_tuple(), &original_tuple);
1385        assert_eq!(connection.reply_tuple(), &reply_tuple);
1386        assert_eq!(connection.external_data(), &1234);
1387    }
1388
1389    #[ip_test(I)]
1390    #[test_case(ConnectionKind::Exclusive)]
1391    #[test_case(ConnectionKind::Shared)]
1392    fn connection_direction<I: IpExt + TestIpExt>(connection_kind: ConnectionKind) {
1393        let bindings_ctx = FakeBindingsCtx::<I>::new();
1394
1395        let packet = PacketMetadata::new(&FakeIpPacket::<I, _> {
1396            src_ip: I::SRC_IP,
1397            dst_ip: I::DST_IP,
1398            body: FakeUdpPacket { src_port: I::SRC_PORT, dst_port: I::DST_PORT },
1399        })
1400        .unwrap();
1401        let original_tuple = packet.tuple.clone();
1402        let reply_tuple = original_tuple.clone().invert();
1403
1404        let mut other_tuple = original_tuple.clone();
1405        other_tuple.src_port_or_id += 1;
1406
1407        let connection: ConnectionExclusive<_, (), _> =
1408            ConnectionExclusive::from_deconstructed_packet(&bindings_ctx, &packet).unwrap();
1409        let connection = match connection_kind {
1410            ConnectionKind::Exclusive => Connection::Exclusive(connection),
1411            ConnectionKind::Shared => Connection::Shared(connection.make_shared()),
1412        };
1413
1414        assert_matches!(connection.direction(&original_tuple), Some(ConnectionDirection::Original));
1415        assert_matches!(connection.direction(&reply_tuple), Some(ConnectionDirection::Reply));
1416        assert_matches!(connection.direction(&other_tuple), None);
1417    }
1418
1419    #[ip_test(I)]
1420    #[test_case(ConnectionKind::Exclusive)]
1421    #[test_case(ConnectionKind::Shared)]
1422    fn connection_update<I: IpExt + TestIpExt>(connection_kind: ConnectionKind) {
1423        let mut bindings_ctx = FakeBindingsCtx::<I>::new();
1424        bindings_ctx.sleep(Duration::from_secs(1));
1425
1426        let packet = PacketMetadata::new(&FakeIpPacket::<I, _> {
1427            src_ip: I::SRC_IP,
1428            dst_ip: I::DST_IP,
1429            body: FakeUdpPacket { src_port: I::SRC_PORT, dst_port: I::DST_PORT },
1430        })
1431        .unwrap();
1432
1433        let reply_packet = PacketMetadata::new(&FakeIpPacket::<I, _> {
1434            src_ip: I::DST_IP,
1435            dst_ip: I::SRC_IP,
1436            body: FakeUdpPacket { src_port: I::DST_PORT, dst_port: I::SRC_PORT },
1437        })
1438        .unwrap();
1439
1440        let connection =
1441            ConnectionExclusive::<_, (), _>::from_deconstructed_packet(&bindings_ctx, &packet)
1442                .unwrap();
1443        let mut connection = match connection_kind {
1444            ConnectionKind::Exclusive => Connection::Exclusive(connection),
1445            ConnectionKind::Shared => Connection::Shared(connection.make_shared()),
1446        };
1447
1448        assert_matches!(
1449            connection.update(&bindings_ctx, &packet, ConnectionDirection::Original),
1450            Ok(ConnectionUpdateAction::NoAction)
1451        );
1452        let state = connection.state();
1453        assert_matches!(state.establishment_lifecycle, EstablishmentLifecycle::SeenOriginal);
1454        assert_eq!(state.last_packet_time.offset, Duration::from_secs(1));
1455
1456        // Tuple in reply direction should set established to true and obviously
1457        // update last packet time.
1458        bindings_ctx.sleep(Duration::from_secs(1));
1459        assert_matches!(
1460            connection.update(&bindings_ctx, &reply_packet, ConnectionDirection::Reply),
1461            Ok(ConnectionUpdateAction::NoAction)
1462        );
1463        let state = connection.state();
1464        assert_matches!(state.establishment_lifecycle, EstablishmentLifecycle::SeenReply);
1465        assert_eq!(state.last_packet_time.offset, Duration::from_secs(2));
1466    }
1467
1468    #[ip_test(I)]
1469    fn table_get_exclusive_connection_and_finalize_shared<I: IpExt + TestIpExt>() {
1470        let mut bindings_ctx = FakeBindingsCtx::new();
1471        bindings_ctx.sleep(Duration::from_secs(1));
1472        let table = Table::<_, (), _>::new::<IntoCoreTimerCtx>(&mut bindings_ctx);
1473
1474        let packet = FakeIpPacket::<I, _> {
1475            src_ip: I::SRC_IP,
1476            dst_ip: I::DST_IP,
1477            body: FakeUdpPacket { src_port: I::SRC_PORT, dst_port: I::DST_PORT },
1478        };
1479
1480        let reply_packet = FakeIpPacket::<I, _> {
1481            src_ip: I::DST_IP,
1482            dst_ip: I::SRC_IP,
1483            body: FakeUdpPacket { src_port: I::DST_PORT, dst_port: I::SRC_PORT },
1484        };
1485
1486        let original_tuple = Tuple::from_packet(&packet).expect("packet should be valid");
1487        let reply_tuple = Tuple::from_packet(&reply_packet).expect("packet should be valid");
1488
1489        let (conn, dir) = table
1490            .get_connection_for_packet_and_update(&bindings_ctx, &packet)
1491            .expect("packet should be valid")
1492            .expect("connection should be present");
1493        let state = conn.state();
1494        assert_matches!(state.establishment_lifecycle, EstablishmentLifecycle::SeenOriginal);
1495        assert_eq!(state.last_packet_time.offset, Duration::from_secs(1));
1496
1497        // Since the connection isn't present in the map, we should get a
1498        // freshly-allocated exclusive connection and the map should not have
1499        // been touched.
1500        assert_matches!(conn, Connection::Exclusive(_));
1501        assert_eq!(dir, ConnectionDirection::Original);
1502        assert!(!table.contains_tuple(&original_tuple));
1503        assert!(!table.contains_tuple(&reply_tuple));
1504
1505        // Once we finalize the connection, it should be present in the map.
1506        assert_matches!(table.finalize_connection(&mut bindings_ctx, conn), Ok((true, Some(_))));
1507        assert!(table.contains_tuple(&original_tuple));
1508        assert!(table.contains_tuple(&reply_tuple));
1509
1510        // We should now get a shared connection back for packets in either
1511        // direction now that the connection is present in the table.
1512        bindings_ctx.sleep(Duration::from_secs(1));
1513        let (conn, dir) = table
1514            .get_connection_for_packet_and_update(&bindings_ctx, &packet)
1515            .expect("packet should be valid")
1516            .expect("connection should be present");
1517        assert_eq!(dir, ConnectionDirection::Original);
1518        let state = conn.state();
1519        assert_matches!(state.establishment_lifecycle, EstablishmentLifecycle::SeenOriginal);
1520        assert_eq!(state.last_packet_time.offset, Duration::from_secs(2));
1521        let conn = assert_matches!(conn, Connection::Shared(conn) => conn);
1522
1523        bindings_ctx.sleep(Duration::from_secs(1));
1524        let (reply_conn, dir) = table
1525            .get_connection_for_packet_and_update(&bindings_ctx, &reply_packet)
1526            .expect("packet should be valid")
1527            .expect("connection should be present");
1528        assert_eq!(dir, ConnectionDirection::Reply);
1529        let state = reply_conn.state();
1530        assert_matches!(state.establishment_lifecycle, EstablishmentLifecycle::SeenReply);
1531        assert_eq!(state.last_packet_time.offset, Duration::from_secs(3));
1532        let reply_conn = assert_matches!(reply_conn, Connection::Shared(conn) => conn);
1533
1534        // We should be getting the same connection in both directions.
1535        assert!(Arc::ptr_eq(&conn, &reply_conn));
1536
1537        // Inserting the connection a second time shouldn't change the map.
1538        let (conn, _dir) = table
1539            .get_connection_for_packet_and_update(&bindings_ctx, &packet)
1540            .expect("packet should be valid")
1541            .unwrap();
1542        assert_matches!(table.finalize_connection(&mut bindings_ctx, conn), Ok((false, Some(_))));
1543        assert!(table.contains_tuple(&original_tuple));
1544        assert!(table.contains_tuple(&reply_tuple));
1545    }
1546
1547    #[ip_test(I)]
1548    fn table_conflict<I: IpExt + TestIpExt>() {
1549        let mut bindings_ctx = FakeBindingsCtx::new();
1550        let table = Table::<_, (), _>::new::<IntoCoreTimerCtx>(&mut bindings_ctx);
1551
1552        let original_packet = PacketMetadata::new(&FakeIpPacket::<I, _> {
1553            src_ip: I::SRC_IP,
1554            dst_ip: I::DST_IP,
1555            body: FakeUdpPacket { src_port: I::SRC_PORT, dst_port: I::DST_PORT },
1556        })
1557        .unwrap();
1558
1559        let nated_original_packet = PacketMetadata::new(&FakeIpPacket::<I, _> {
1560            src_ip: I::SRC_IP,
1561            dst_ip: I::DST_IP,
1562            body: FakeUdpPacket { src_port: I::SRC_PORT + 1, dst_port: I::DST_PORT + 1 },
1563        })
1564        .unwrap();
1565
1566        let conn1 = Connection::Exclusive(
1567            ConnectionExclusive::<_, (), _>::from_deconstructed_packet(
1568                &bindings_ctx,
1569                &original_packet,
1570            )
1571            .unwrap(),
1572        );
1573
1574        // Fake NAT that ends up allocating the same reply tuple as an existing
1575        // connection.
1576        let mut conn2 = ConnectionExclusive::<_, (), _>::from_deconstructed_packet(
1577            &bindings_ctx,
1578            &original_packet,
1579        )
1580        .unwrap();
1581        conn2.inner.original_tuple = nated_original_packet.tuple.clone();
1582        let conn2 = Connection::Exclusive(conn2);
1583
1584        // Fake NAT that ends up allocating the same original tuple as an
1585        // existing connection.
1586        let mut conn3 = ConnectionExclusive::<_, (), _>::from_deconstructed_packet(
1587            &bindings_ctx,
1588            &original_packet,
1589        )
1590        .unwrap();
1591        conn3.inner.reply_tuple = nated_original_packet.tuple.clone().invert();
1592        let conn3 = Connection::Exclusive(conn3);
1593
1594        assert_matches!(table.finalize_connection(&mut bindings_ctx, conn1), Ok((true, Some(_))));
1595        assert_matches!(
1596            table.finalize_connection(&mut bindings_ctx, conn2),
1597            Err(FinalizeConnectionError::Conflict)
1598        );
1599        assert_matches!(
1600            table.finalize_connection(&mut bindings_ctx, conn3),
1601            Err(FinalizeConnectionError::Conflict)
1602        );
1603    }
1604
1605    #[ip_test(I)]
1606    fn table_conflict_identical_connection<
1607        I: IpExt + crate::packets::testutil::internal::TestIpExt,
1608    >() {
1609        let mut bindings_ctx = FakeBindingsCtx::new();
1610        let table = Table::<_, (), _>::new::<IntoCoreTimerCtx>(&mut bindings_ctx);
1611
1612        let original_packet =
1613            PacketMetadata::new(&FakeIpPacket::<I, FakeUdpPacket>::arbitrary_value()).unwrap();
1614
1615        // Simulate a race where two packets in the same flow both end up
1616        // creating identical exclusive connections.
1617
1618        let conn = Connection::Exclusive(
1619            ConnectionExclusive::<_, (), _>::from_deconstructed_packet(
1620                &bindings_ctx,
1621                &original_packet,
1622            )
1623            .unwrap(),
1624        );
1625        let finalized = assert_matches!(
1626            table.finalize_connection(&mut bindings_ctx, conn),
1627            Ok((true, Some(conn))) => conn
1628        );
1629
1630        let conn = Connection::Exclusive(
1631            ConnectionExclusive::<_, (), _>::from_deconstructed_packet(
1632                &bindings_ctx,
1633                &original_packet,
1634            )
1635            .unwrap(),
1636        );
1637        let conn = assert_matches!(
1638            table.finalize_connection(&mut bindings_ctx, conn),
1639            Ok((false, Some(conn))) => conn
1640        );
1641        assert!(Arc::ptr_eq(&finalized, &conn));
1642    }
1643
1644    #[derive(Copy, Clone)]
1645    enum GcTrigger {
1646        /// Call [`perform_gc`] function directly, avoiding any timer logic.
1647        Direct,
1648        /// Trigger a timer expiry, which indirectly calls into [`perform_gc`].
1649        Timer,
1650    }
1651
1652    #[ip_test(I)]
1653    #[test_case(GcTrigger::Direct)]
1654    #[test_case(GcTrigger::Timer)]
1655    fn garbage_collection<I: TestIpExt>(gc_trigger: GcTrigger) {
1656        fn perform_gc<I: TestIpExt>(
1657            core_ctx: &mut FakeCtx<I>,
1658            bindings_ctx: &mut FakeBindingsCtx<I>,
1659            gc_trigger: GcTrigger,
1660        ) {
1661            match gc_trigger {
1662                GcTrigger::Direct => core_ctx.conntrack().perform_gc(bindings_ctx),
1663                GcTrigger::Timer => {
1664                    for timer in bindings_ctx
1665                        .trigger_timers_until_instant(bindings_ctx.timer_ctx.instant.time, core_ctx)
1666                    {
1667                        assert_matches!(timer, FilterTimerId::ConntrackGc(_));
1668                    }
1669                }
1670            }
1671        }
1672
1673        let mut bindings_ctx = FakeBindingsCtx::new();
1674        let mut core_ctx = FakeCtx::with_ip_routines(&mut bindings_ctx, IpRoutines::default());
1675
1676        let first_packet = FakeIpPacket::<I, _> {
1677            src_ip: I::SRC_IP,
1678            dst_ip: I::DST_IP,
1679            body: FakeUdpPacket { src_port: I::SRC_PORT, dst_port: I::DST_PORT },
1680        };
1681
1682        let second_packet = FakeIpPacket::<I, _> {
1683            src_ip: I::SRC_IP,
1684            dst_ip: I::DST_IP,
1685            body: FakeUdpPacket { src_port: I::SRC_PORT + 1, dst_port: I::DST_PORT },
1686        };
1687        let second_packet_reply = FakeIpPacket::<I, _> {
1688            src_ip: I::DST_IP,
1689            dst_ip: I::SRC_IP,
1690            body: FakeUdpPacket { src_port: I::DST_PORT, dst_port: I::SRC_PORT + 1 },
1691        };
1692
1693        let first_tuple = Tuple::from_packet(&first_packet).expect("packet should be valid");
1694        let first_tuple_reply = first_tuple.clone().invert();
1695        let second_tuple = Tuple::from_packet(&second_packet).expect("packet should be valid");
1696        let second_tuple_reply =
1697            Tuple::from_packet(&second_packet_reply).expect("packet should be valid");
1698
1699        // T=0: Packets for two connections come in.
1700        let (conn, _dir) = core_ctx
1701            .conntrack()
1702            .get_connection_for_packet_and_update(&bindings_ctx, &first_packet)
1703            .expect("packet should be valid")
1704            .expect("packet should be trackable");
1705        assert_matches!(
1706            core_ctx
1707                .conntrack()
1708                .finalize_connection(&mut bindings_ctx, conn)
1709                .expect("connection finalize should succeed"),
1710            (true, Some(_))
1711        );
1712        let (conn, _dir) = core_ctx
1713            .conntrack()
1714            .get_connection_for_packet_and_update(&bindings_ctx, &second_packet)
1715            .expect("packet should be valid")
1716            .expect("packet should be trackable");
1717        assert_matches!(
1718            core_ctx
1719                .conntrack()
1720                .finalize_connection(&mut bindings_ctx, conn)
1721                .expect("connection finalize should succeed"),
1722            (true, Some(_))
1723        );
1724        assert!(core_ctx.conntrack().contains_tuple(&first_tuple));
1725        assert!(core_ctx.conntrack().contains_tuple(&second_tuple));
1726        assert_eq!(core_ctx.conntrack().inner.lock().table.len(), 4);
1727
1728        // T=GC_INTERVAL: Triggering a GC does not clean up any connections,
1729        // because no connections are stale yet.
1730        bindings_ctx.sleep(GC_INTERVAL);
1731        perform_gc(&mut core_ctx, &mut bindings_ctx, gc_trigger);
1732        assert_eq!(core_ctx.conntrack().contains_tuple(&first_tuple), true);
1733        assert_eq!(core_ctx.conntrack().contains_tuple(&first_tuple_reply), true);
1734        assert_eq!(core_ctx.conntrack().contains_tuple(&second_tuple), true);
1735        assert_eq!(core_ctx.conntrack().contains_tuple(&second_tuple_reply), true);
1736        assert_eq!(core_ctx.conntrack().inner.lock().table.len(), 4);
1737
1738        // T=GC_INTERVAL a packet for just the second connection comes in.
1739        let (conn, _dir) = core_ctx
1740            .conntrack()
1741            .get_connection_for_packet_and_update(&bindings_ctx, &second_packet_reply)
1742            .expect("packet should be valid")
1743            .expect("packet should be trackable");
1744        assert_matches!(conn.state().establishment_lifecycle, EstablishmentLifecycle::SeenReply);
1745        assert_matches!(
1746            core_ctx
1747                .conntrack()
1748                .finalize_connection(&mut bindings_ctx, conn)
1749                .expect("connection finalize should succeed"),
1750            (false, Some(_))
1751        );
1752        assert_eq!(core_ctx.conntrack().contains_tuple(&first_tuple), true);
1753        assert_eq!(core_ctx.conntrack().contains_tuple(&first_tuple_reply), true);
1754        assert_eq!(core_ctx.conntrack().contains_tuple(&second_tuple), true);
1755        assert_eq!(core_ctx.conntrack().contains_tuple(&second_tuple_reply), true);
1756        assert_eq!(core_ctx.conntrack().inner.lock().table.len(), 4);
1757
1758        // The state in the table at this point is:
1759        // Connection 1:
1760        //   - Last packet seen at T=0
1761        //   - Expires after T=CONNECTION_EXPIRY_TIME_UDP
1762        // Connection 2:
1763        //   - Last packet seen at T=GC_INTERVAL
1764        //   - Expires after CONNECTION_EXPIRY_TIME_UDP + GC_INTERVAL
1765
1766        // T=2*GC_INTERVAL: Triggering a GC does not clean up any connections.
1767        bindings_ctx.sleep(GC_INTERVAL);
1768        perform_gc(&mut core_ctx, &mut bindings_ctx, gc_trigger);
1769        assert_eq!(core_ctx.conntrack().contains_tuple(&first_tuple), true);
1770        assert_eq!(core_ctx.conntrack().contains_tuple(&first_tuple_reply), true);
1771        assert_eq!(core_ctx.conntrack().contains_tuple(&second_tuple), true);
1772        assert_eq!(core_ctx.conntrack().contains_tuple(&second_tuple_reply), true);
1773        assert_eq!(core_ctx.conntrack().inner.lock().table.len(), 4);
1774
1775        // Time advances to expiry for the first packet
1776        // (T=CONNECTION_EXPIRY_TIME_UDP) trigger gc and note that the first
1777        // connection was cleaned up
1778        bindings_ctx.sleep(CONNECTION_EXPIRY_TIME_UDP - 2 * GC_INTERVAL);
1779        perform_gc(&mut core_ctx, &mut bindings_ctx, gc_trigger);
1780        assert_eq!(core_ctx.conntrack().contains_tuple(&first_tuple), false);
1781        assert_eq!(core_ctx.conntrack().contains_tuple(&first_tuple_reply), false);
1782        assert_eq!(core_ctx.conntrack().contains_tuple(&second_tuple), true);
1783        assert_eq!(core_ctx.conntrack().contains_tuple(&second_tuple_reply), true);
1784        assert_eq!(core_ctx.conntrack().inner.lock().table.len(), 2);
1785
1786        // Advance time past the expiry time for the second connection
1787        // (T=CONNECTION_EXPIRY_TIME_UDP + GC_INTERVAL) and see that it is
1788        // cleaned up.
1789        bindings_ctx.sleep(GC_INTERVAL);
1790        perform_gc(&mut core_ctx, &mut bindings_ctx, gc_trigger);
1791        assert_eq!(core_ctx.conntrack().contains_tuple(&first_tuple), false);
1792        assert_eq!(core_ctx.conntrack().contains_tuple(&first_tuple_reply), false);
1793        assert_eq!(core_ctx.conntrack().contains_tuple(&second_tuple), false);
1794        assert_eq!(core_ctx.conntrack().contains_tuple(&second_tuple_reply), false);
1795        assert!(core_ctx.conntrack().inner.lock().table.is_empty());
1796    }
1797
1798    fn fill_table<I, E, BC>(
1799        bindings_ctx: &mut BC,
1800        table: &Table<I, E, BC>,
1801        entries: impl Iterator<Item = u32>,
1802        establishment_lifecycle: EstablishmentLifecycle,
1803    ) where
1804        I: IpExt + TestIpExt,
1805        E: Debug + Default + Send + Sync + PartialEq + CompatibleWith + 'static,
1806        BC: FilterBindingsContext,
1807    {
1808        for i in entries {
1809            let (packet, reply_packet) = make_test_udp_packets(i);
1810            let (conn, _dir) = table
1811                .get_connection_for_packet_and_update(&bindings_ctx, &packet)
1812                .expect("packet should be valid")
1813                .expect("packet should be trackable");
1814            assert_matches!(
1815                table
1816                    .finalize_connection(bindings_ctx, conn)
1817                    .expect("connection finalize should succeed"),
1818                (true, Some(_))
1819            );
1820
1821            if establishment_lifecycle >= EstablishmentLifecycle::SeenReply {
1822                let (conn, _dir) = table
1823                    .get_connection_for_packet_and_update(&bindings_ctx, &reply_packet)
1824                    .expect("packet should be valid")
1825                    .expect("packet should be trackable");
1826                assert_matches!(
1827                    table
1828                        .finalize_connection(bindings_ctx, conn)
1829                        .expect("connection finalize should succeed"),
1830                    (false, Some(_))
1831                );
1832
1833                if establishment_lifecycle >= EstablishmentLifecycle::Established {
1834                    let (conn, _dir) = table
1835                        .get_connection_for_packet_and_update(&bindings_ctx, &packet)
1836                        .expect("packet should be valid")
1837                        .expect("packet should be trackable");
1838                    assert_matches!(
1839                        table
1840                            .finalize_connection(bindings_ctx, conn)
1841                            .expect("connection finalize should succeed"),
1842                        (false, Some(_))
1843                    );
1844                }
1845            }
1846        }
1847    }
1848
1849    #[ip_test(I)]
1850    #[test_case(EstablishmentLifecycle::SeenOriginal; "existing connections unestablished")]
1851    #[test_case(EstablishmentLifecycle::SeenReply; "existing connections partially established")]
1852    #[test_case(EstablishmentLifecycle::Established; "existing connections established")]
1853    fn table_size_limit_evict_less_established<I: IpExt + TestIpExt>(
1854        existing_lifecycle: EstablishmentLifecycle,
1855    ) {
1856        let mut bindings_ctx = FakeBindingsCtx::<I>::new();
1857        bindings_ctx.sleep(Duration::from_secs(1));
1858        let table = Table::<_, (), _>::new::<IntoCoreTimerCtx>(&mut bindings_ctx);
1859
1860        fill_table(
1861            &mut bindings_ctx,
1862            &table,
1863            0..(MAXIMUM_ENTRIES / 2).try_into().unwrap(),
1864            existing_lifecycle,
1865        );
1866
1867        // The table should be full whether or not the connections are
1868        // established since finalize_connection always inserts the connection
1869        // under the original and reply tuples.
1870        assert_eq!(table.inner.lock().table.len(), MAXIMUM_ENTRIES);
1871
1872        let (packet, _) = make_test_udp_packets((MAXIMUM_ENTRIES / 2).try_into().unwrap());
1873        let (conn, _dir) = table
1874            .get_connection_for_packet_and_update(&bindings_ctx, &packet)
1875            .expect("packet should be valid")
1876            .expect("packet should be trackable");
1877        if existing_lifecycle == EstablishmentLifecycle::Established {
1878            // Inserting a new connection should fail because it would grow the
1879            // table.
1880            assert_matches!(
1881                table.finalize_connection(&mut bindings_ctx, conn),
1882                Err(FinalizeConnectionError::TableFull)
1883            );
1884
1885            // Inserting an existing connection again should succeed because
1886            // it's not growing the table.
1887            let (packet, _) = make_test_udp_packets((MAXIMUM_ENTRIES / 2 - 1).try_into().unwrap());
1888            let (conn, _dir) = table
1889                .get_connection_for_packet_and_update(&bindings_ctx, &packet)
1890                .expect("packet should be valid")
1891                .expect("packet should be trackable");
1892            assert_matches!(
1893                table
1894                    .finalize_connection(&mut bindings_ctx, conn)
1895                    .expect("connection finalize should succeed"),
1896                (false, Some(_))
1897            );
1898        } else {
1899            assert_matches!(
1900                table
1901                    .finalize_connection(&mut bindings_ctx, conn)
1902                    .expect("connection finalize should succeed"),
1903                (true, Some(_))
1904            );
1905        }
1906    }
1907
1908    #[ip_test(I)]
1909    fn table_size_limit_evict_expired<I: IpExt + TestIpExt>() {
1910        let mut bindings_ctx = FakeBindingsCtx::<I>::new();
1911        let table = Table::<_, (), _>::new::<IntoCoreTimerCtx>(&mut bindings_ctx);
1912
1913        // Add one connection that expires a second sooner than the others.
1914        let evicted_tuple = {
1915            let (packet, _) = make_test_udp_packets(0);
1916            Tuple::from_packet(&packet).unwrap()
1917        };
1918        fill_table(&mut bindings_ctx, &table, 0..=0, EstablishmentLifecycle::Established);
1919        bindings_ctx.sleep(Duration::from_secs(1));
1920        fill_table(
1921            &mut bindings_ctx,
1922            &table,
1923            1..(MAXIMUM_ENTRIES / 2).try_into().unwrap(),
1924            EstablishmentLifecycle::Established,
1925        );
1926
1927        assert_eq!(table.inner.lock().table.len(), MAXIMUM_ENTRIES);
1928        assert!(table.contains_tuple(&evicted_tuple));
1929
1930        let (packet, _) = make_test_udp_packets((MAXIMUM_ENTRIES / 2).try_into().unwrap());
1931        // The table is full, and no connections can be evicted (they're all
1932        // established and unexpired), so we can't insert a new connection.
1933        let (conn, _dir) = table
1934            .get_connection_for_packet_and_update(&bindings_ctx, &packet)
1935            .expect("packet should be valid")
1936            .expect("packet should be trackable");
1937        assert_matches!(
1938            table.finalize_connection(&mut bindings_ctx, conn),
1939            Err(FinalizeConnectionError::TableFull)
1940        );
1941
1942        // Now the first connection can be evicted because it's expired, and we
1943        // see that we're able to insert a new connection.
1944        bindings_ctx.sleep(CONNECTION_EXPIRY_TIME_UDP - Duration::from_secs(1));
1945        let (conn, _dir) = table
1946            .get_connection_for_packet_and_update(&bindings_ctx, &packet)
1947            .expect("packet should be valid")
1948            .expect("packet should be trackable");
1949        assert_matches!(table.finalize_connection(&mut bindings_ctx, conn), Ok(_));
1950        assert!(!table.contains_tuple(&evicted_tuple));
1951    }
1952
1953    #[ip_test(I)]
1954    fn table_size_limit_less_established<I: IpExt + TestIpExt>() {
1955        let mut bindings_ctx = FakeBindingsCtx::<I>::new();
1956        let table = Table::<_, (), _>::new::<IntoCoreTimerCtx>(&mut bindings_ctx);
1957
1958        let evicted_tuple = {
1959            let (packet, _) = make_test_udp_packets(0);
1960            Tuple::from_packet(&packet).unwrap()
1961        };
1962        // Add one connection that expires a second sooner than the others.
1963        fill_table(&mut bindings_ctx, &table, 0..=0, EstablishmentLifecycle::SeenOriginal);
1964        bindings_ctx.sleep(Duration::from_secs(1));
1965        fill_table(&mut bindings_ctx, &table, 1..=1, EstablishmentLifecycle::SeenOriginal);
1966        fill_table(
1967            &mut bindings_ctx,
1968            &table,
1969            2..(MAXIMUM_ENTRIES / 2).try_into().unwrap(),
1970            EstablishmentLifecycle::SeenReply,
1971        );
1972
1973        assert_eq!(table.inner.lock().table.len(), MAXIMUM_ENTRIES);
1974        assert!(table.contains_tuple(&evicted_tuple));
1975
1976        // We can insert since all connections in the table are eligible for
1977        // eviction, but we want to be sure that the least established
1978        // connection was the one that's actually evicted.
1979        let (packet, _) = make_test_udp_packets((MAXIMUM_ENTRIES / 2).try_into().unwrap());
1980        let (conn, _dir) = table
1981            .get_connection_for_packet_and_update(&bindings_ctx, &packet)
1982            .expect("packet should be valid")
1983            .expect("packet should be trackable");
1984        assert_matches!(table.finalize_connection(&mut bindings_ctx, conn), Ok(_));
1985        assert!(!table.contains_tuple(&evicted_tuple));
1986    }
1987
1988    #[cfg(target_os = "fuchsia")]
1989    #[ip_test(I)]
1990    fn inspect<I: IpExt + TestIpExt>() {
1991        use alloc::boxed::Box;
1992        use alloc::string::ToString;
1993        use diagnostics_assertions::assert_data_tree;
1994        use diagnostics_traits::FuchsiaInspector;
1995        use fuchsia_inspect::Inspector;
1996
1997        let mut bindings_ctx = FakeBindingsCtx::<I>::new();
1998        bindings_ctx.sleep(Duration::from_secs(1));
1999        let table = Table::<_, (), _>::new::<IntoCoreTimerCtx>(&mut bindings_ctx);
2000
2001        {
2002            let inspector = Inspector::new(Default::default());
2003            let mut bindings_inspector = FuchsiaInspector::<()>::new(inspector.root());
2004            bindings_inspector.delegate_inspectable(&table);
2005
2006            assert_data_tree!(inspector, "root": {
2007                "table_limit_drops": 0u64,
2008                "table_limit_hits": 0u64,
2009                "num_entries": 0u64,
2010                "connections": {},
2011            });
2012        }
2013
2014        // Insert the first connection into the table in an unestablished state.
2015        // This will later be evicted when the table fills up.
2016        let (packet, _) = make_test_udp_packets::<I>(0);
2017        let (conn, _dir) = table
2018            .get_connection_for_packet_and_update(&bindings_ctx, &packet)
2019            .expect("packet should be valid")
2020            .expect("packet should be trackable");
2021        assert_matches!(conn.state().establishment_lifecycle, EstablishmentLifecycle::SeenOriginal);
2022        assert_matches!(
2023            table
2024                .finalize_connection(&mut bindings_ctx, conn)
2025                .expect("connection finalize should succeed"),
2026            (true, Some(_))
2027        );
2028
2029        {
2030            let inspector = Inspector::new(Default::default());
2031            let mut bindings_inspector = FuchsiaInspector::<()>::new(inspector.root());
2032            bindings_inspector.delegate_inspectable(&table);
2033
2034            assert_data_tree!(inspector, "root": {
2035                "table_limit_drops": 0u64,
2036                "table_limit_hits": 0u64,
2037                "num_entries": 2u64,
2038                "connections": {
2039                    "0": {
2040                        "original_tuple": {
2041                            "protocol": "UDP",
2042                            "src_addr": I::SRC_IP.to_string(),
2043                            "dst_addr": I::DST_IP.to_string(),
2044                            "src_port_or_id": 0u64,
2045                            "dst_port_or_id": 0u64,
2046                        },
2047                        "reply_tuple": {
2048                            "protocol": "UDP",
2049                            "src_addr": I::DST_IP.to_string(),
2050                            "dst_addr": I::SRC_IP.to_string(),
2051                            "src_port_or_id": 0u64,
2052                            "dst_port_or_id": 0u64,
2053                        },
2054                        "external_data": {},
2055                        "established": false,
2056                        "last_packet_time": 1_000_000_000u64,
2057                    }
2058                },
2059            });
2060        }
2061
2062        // Fill the table up the rest of the way.
2063        fill_table(
2064            &mut bindings_ctx,
2065            &table,
2066            1..(MAXIMUM_ENTRIES / 2).try_into().unwrap(),
2067            EstablishmentLifecycle::Established,
2068        );
2069
2070        assert_eq!(table.inner.lock().table.len(), MAXIMUM_ENTRIES);
2071
2072        // This first one should succeed because it can evict the
2073        // non-established connection.
2074        let (packet, reply_packet) =
2075            make_test_udp_packets((MAXIMUM_ENTRIES / 2).try_into().unwrap());
2076        let (conn, _dir) = table
2077            .get_connection_for_packet_and_update(&bindings_ctx, &packet)
2078            .expect("packet should be valid")
2079            .expect("packet should be trackable");
2080        assert_matches!(
2081            table
2082                .finalize_connection(&mut bindings_ctx, conn)
2083                .expect("connection finalize should succeed"),
2084            (true, Some(_))
2085        );
2086        let (conn, _dir) = table
2087            .get_connection_for_packet_and_update(&bindings_ctx, &reply_packet)
2088            .expect("packet should be valid")
2089            .expect("packet should be trackable");
2090        assert_matches!(
2091            table
2092                .finalize_connection(&mut bindings_ctx, conn)
2093                .expect("connection finalize should succeed"),
2094            (false, Some(_))
2095        );
2096        let (conn, _dir) = table
2097            .get_connection_for_packet_and_update(&bindings_ctx, &packet)
2098            .expect("packet should be valid")
2099            .expect("packet should be trackable");
2100        assert_matches!(
2101            table
2102                .finalize_connection(&mut bindings_ctx, conn)
2103                .expect("connection finalize should succeed"),
2104            (false, Some(_))
2105        );
2106
2107        // This next one should fail because there are no connections left to
2108        // evict.
2109        let (packet, _) = make_test_udp_packets((MAXIMUM_ENTRIES / 2 + 1).try_into().unwrap());
2110        let (conn, _dir) = table
2111            .get_connection_for_packet_and_update(&bindings_ctx, &packet)
2112            .expect("packet should be valid")
2113            .expect("packet should be trackable");
2114        assert_matches!(
2115            table.finalize_connection(&mut bindings_ctx, conn),
2116            Err(FinalizeConnectionError::TableFull)
2117        );
2118
2119        {
2120            let inspector = Inspector::new(Default::default());
2121            let mut bindings_inspector = FuchsiaInspector::<()>::new(inspector.root());
2122            bindings_inspector.delegate_inspectable(&table);
2123
2124            assert_data_tree!(inspector, "root": contains {
2125                "table_limit_drops": 1u64,
2126                "table_limit_hits": 2u64,
2127                "num_entries": MAXIMUM_ENTRIES as u64,
2128            });
2129        }
2130    }
2131
2132    #[ip_test(I)]
2133    fn self_connected_socket<I: IpExt + TestIpExt>() {
2134        let mut bindings_ctx = FakeBindingsCtx::new();
2135        let table = Table::<_, (), _>::new::<IntoCoreTimerCtx>(&mut bindings_ctx);
2136
2137        let packet = FakeIpPacket::<I, _> {
2138            src_ip: I::SRC_IP,
2139            dst_ip: I::SRC_IP,
2140            body: FakeUdpPacket { src_port: I::SRC_PORT, dst_port: I::SRC_PORT },
2141        };
2142
2143        let tuple = Tuple::from_packet(&packet).expect("packet should be valid");
2144        let reply_tuple = tuple.clone().invert();
2145
2146        assert_eq!(tuple, reply_tuple);
2147
2148        let (conn, _dir) = table
2149            .get_connection_for_packet_and_update(&bindings_ctx, &packet)
2150            .expect("packet should be valid")
2151            .expect("packet should be trackable");
2152        let state = conn.state();
2153        // Since we can't differentiate between the original and reply tuple,
2154        // the connection ends up being marked established immediately.
2155        assert_matches!(state.establishment_lifecycle, EstablishmentLifecycle::SeenReply);
2156
2157        assert_matches!(conn, Connection::Exclusive(_));
2158        assert!(!table.contains_tuple(&tuple));
2159
2160        // Once we finalize the connection, it should be present in the map.
2161        assert_matches!(table.finalize_connection(&mut bindings_ctx, conn), Ok((true, Some(_))));
2162        assert!(table.contains_tuple(&tuple));
2163
2164        // There should be a single connection in the table, despite there only
2165        // being a single tuple.
2166        assert_eq!(table.inner.lock().table.len(), 1);
2167
2168        bindings_ctx.sleep(CONNECTION_EXPIRY_TIME_UDP);
2169        table.perform_gc(&mut bindings_ctx);
2170
2171        assert!(table.inner.lock().table.is_empty());
2172    }
2173
2174    #[ip_test(I)]
2175    fn remove_entry_on_update<I: IpExt + TestIpExt>() {
2176        let mut bindings_ctx = FakeBindingsCtx::new();
2177        let table = Table::<_, (), _>::new::<IntoCoreTimerCtx>(&mut bindings_ctx);
2178
2179        let original_packet = FakeIpPacket::<I, _> {
2180            src_ip: I::SRC_IP,
2181            dst_ip: I::DST_IP,
2182            body: FakeTcpSegment {
2183                src_port: I::SRC_PORT,
2184                dst_port: I::DST_PORT,
2185                segment: SegmentHeader {
2186                    seq: SeqNum::new(1024),
2187                    ack: None,
2188                    wnd: UnscaledWindowSize::from(16u16),
2189                    control: Some(Control::SYN),
2190                    options: Options::default(),
2191                },
2192                payload_len: 0,
2193            },
2194        };
2195
2196        let reply_packet = FakeIpPacket::<I, _> {
2197            src_ip: I::DST_IP,
2198            dst_ip: I::SRC_IP,
2199            body: FakeTcpSegment {
2200                src_port: I::DST_PORT,
2201                dst_port: I::SRC_PORT,
2202                segment: SegmentHeader {
2203                    seq: SeqNum::new(0),
2204                    ack: Some(SeqNum::new(1025)),
2205                    wnd: UnscaledWindowSize::from(16u16),
2206                    control: Some(Control::RST),
2207                    options: Options::default(),
2208                },
2209                payload_len: 0,
2210            },
2211        };
2212
2213        let tuple = Tuple::from_packet(&original_packet).expect("packet should be valid");
2214        let reply_tuple = tuple.clone().invert();
2215
2216        let (conn, _dir) = table
2217            .get_connection_for_packet_and_update(&bindings_ctx, &original_packet)
2218            .expect("packet should be valid")
2219            .expect("packet should be trackable");
2220        assert_matches!(table.finalize_connection(&mut bindings_ctx, conn), Ok((true, Some(_))));
2221
2222        assert!(table.contains_tuple(&tuple));
2223        assert!(table.contains_tuple(&reply_tuple));
2224
2225        // Sending the reply RST through should result in the connection being
2226        // removed from the table.
2227        let (conn, _dir) = table
2228            .get_connection_for_packet_and_update(&bindings_ctx, &reply_packet)
2229            .expect("packet should be valid")
2230            .expect("packet should be trackable");
2231
2232        assert!(!table.contains_tuple(&tuple));
2233        assert!(!table.contains_tuple(&reply_tuple));
2234        assert!(table.inner.lock().table.is_empty());
2235
2236        // The connection should not added back on finalization.
2237        assert_matches!(table.finalize_connection(&mut bindings_ctx, conn), Ok((false, Some(_))));
2238
2239        assert!(!table.contains_tuple(&tuple));
2240        assert!(!table.contains_tuple(&reply_tuple));
2241        assert!(table.inner.lock().table.is_empty());
2242
2243        // GC should complete successfully.
2244        bindings_ctx.sleep(Duration::from_secs(60 * 60 * 24 * 6));
2245        table.perform_gc(&mut bindings_ctx);
2246    }
2247
2248    #[ip_test(I)]
2249    fn do_not_insert<I: IpExt + TestIpExt>() {
2250        let mut bindings_ctx = FakeBindingsCtx::new();
2251        let table = Table::<_, (), _>::new::<IntoCoreTimerCtx>(&mut bindings_ctx);
2252
2253        let packet = FakeIpPacket::<I, _> {
2254            src_ip: I::SRC_IP,
2255            dst_ip: I::DST_IP,
2256            body: FakeUdpPacket { src_port: I::SRC_PORT, dst_port: I::DST_PORT },
2257        };
2258
2259        let tuple = Tuple::from_packet(&packet).expect("packet should be valid");
2260        let reply_tuple = tuple.clone().invert();
2261
2262        let (conn, _dir) = table
2263            .get_connection_for_packet_and_update(&bindings_ctx, &packet)
2264            .expect("packet should be valid")
2265            .expect("packet should be trackable");
2266        let mut conn = assert_matches!(conn, Connection::Exclusive(conn) => conn);
2267        conn.do_not_insert = true;
2268        assert_matches!(
2269            table.finalize_connection(&mut bindings_ctx, Connection::Exclusive(conn)),
2270            Ok((false, None))
2271        );
2272
2273        assert!(!table.contains_tuple(&tuple));
2274        assert!(!table.contains_tuple(&reply_tuple));
2275    }
2276}