netstack3_tcp/
base.rs

1// Copyright 2022 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! The Transmission Control Protocol (TCP).
6
7use core::num::NonZeroU8;
8use core::time::Duration;
9
10use derivative::Derivative;
11use net_types::ip::{GenericOverIp, Ip, Ipv4, Ipv6, Mtu};
12use net_types::SpecifiedAddr;
13use netstack3_base::{
14    IcmpErrorCode, Icmpv4ErrorCode, Icmpv6ErrorCode, IpExt, Marks, Mms, UnscaledWindowSize,
15    WeakDeviceIdentifier, WindowSize,
16};
17use netstack3_ip::socket::{RouteResolutionOptions, SendOptions};
18use packet_formats::icmp::{
19    Icmpv4DestUnreachableCode, Icmpv4TimeExceededCode, Icmpv6DestUnreachableCode,
20};
21use packet_formats::ip::DscpAndEcn;
22use packet_formats::utils::NonZeroDuration;
23use rand::Rng;
24
25use crate::internal::buffer::BufferLimits;
26use crate::internal::counters::{TcpCountersWithSocket, TcpCountersWithoutSocket};
27use crate::internal::socket::isn::IsnGenerator;
28use crate::internal::socket::{DualStackIpExt, Sockets, TcpBindingsTypes, WeakTcpSocketId};
29use crate::internal::state::DEFAULT_MAX_SYN_RETRIES;
30
31/// Default lifetime for a orphaned connection in FIN_WAIT2.
32pub const DEFAULT_FIN_WAIT2_TIMEOUT: Duration = Duration::from_secs(60);
33
34/// Errors surfaced to the user.
35#[derive(Copy, Clone, Debug, PartialEq, Eq)]
36pub enum ConnectionError {
37    /// The connection was refused, RST segment received while in SYN_SENT state.
38    ConnectionRefused,
39    /// The connection was reset because of a RST segment.
40    ConnectionReset,
41    /// The connection was closed because the network is unreachable.
42    NetworkUnreachable,
43    /// The connection was closed because the host is unreachable.
44    HostUnreachable,
45    /// The connection was closed because the protocol is unreachable.
46    ProtocolUnreachable,
47    /// The connection was closed because the port is unreachable.
48    PortUnreachable,
49    /// The connection was closed because the host is down.
50    DestinationHostDown,
51    /// The connection was closed because the source route failed.
52    SourceRouteFailed,
53    /// The connection was closed because the source host is isolated.
54    SourceHostIsolated,
55    /// The connection was closed because of a time out.
56    TimedOut,
57    /// The connection was closed because of a lack of required permissions.
58    PermissionDenied,
59    /// The connection was closed because there was a protocol error.
60    ProtocolError,
61}
62
63/// The meaning of a particular ICMP error to a TCP socket.
64pub(crate) enum IcmpErrorResult {
65    /// There has been an error on the connection that must be handled.
66    ConnectionError(ConnectionError),
67    /// The PMTU used by the connection has been updated.
68    PmtuUpdate(Mms),
69}
70
71impl IcmpErrorResult {
72    // Notes: the following mappings are guided by the packetimpact test here:
73    // https://cs.opensource.google/gvisor/gvisor/+/master:test/packetimpact/tests/tcp_network_unreachable_test.go;drc=611e6e1247a0691f5fd198f411c68b3bc79d90af
74    pub(crate) fn try_from_icmp_error(err: IcmpErrorCode) -> Option<IcmpErrorResult> {
75        match err {
76            IcmpErrorCode::V4(Icmpv4ErrorCode::DestUnreachable(code, message)) => {
77                match code {
78                    Icmpv4DestUnreachableCode::DestNetworkUnreachable => {
79                        Some(IcmpErrorResult::ConnectionError(ConnectionError::NetworkUnreachable))
80                    }
81                    Icmpv4DestUnreachableCode::DestHostUnreachable => {
82                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
83                    }
84                    Icmpv4DestUnreachableCode::DestProtocolUnreachable => {
85                        Some(IcmpErrorResult::ConnectionError(ConnectionError::ProtocolUnreachable))
86                    }
87                    Icmpv4DestUnreachableCode::DestPortUnreachable => {
88                        Some(IcmpErrorResult::ConnectionError(ConnectionError::PortUnreachable))
89                    }
90                    Icmpv4DestUnreachableCode::SourceRouteFailed => {
91                        Some(IcmpErrorResult::ConnectionError(ConnectionError::SourceRouteFailed))
92                    }
93                    Icmpv4DestUnreachableCode::DestNetworkUnknown => {
94                        Some(IcmpErrorResult::ConnectionError(ConnectionError::NetworkUnreachable))
95                    }
96                    Icmpv4DestUnreachableCode::DestHostUnknown => {
97                        Some(IcmpErrorResult::ConnectionError(ConnectionError::DestinationHostDown))
98                    }
99                    Icmpv4DestUnreachableCode::SourceHostIsolated => {
100                        Some(IcmpErrorResult::ConnectionError(ConnectionError::SourceHostIsolated))
101                    }
102                    Icmpv4DestUnreachableCode::NetworkAdministrativelyProhibited => {
103                        Some(IcmpErrorResult::ConnectionError(ConnectionError::NetworkUnreachable))
104                    }
105                    Icmpv4DestUnreachableCode::HostAdministrativelyProhibited => {
106                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
107                    }
108                    Icmpv4DestUnreachableCode::NetworkUnreachableForToS => {
109                        Some(IcmpErrorResult::ConnectionError(ConnectionError::NetworkUnreachable))
110                    }
111                    Icmpv4DestUnreachableCode::HostUnreachableForToS => {
112                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
113                    }
114                    Icmpv4DestUnreachableCode::CommAdministrativelyProhibited => {
115                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
116                    }
117                    Icmpv4DestUnreachableCode::HostPrecedenceViolation => {
118                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
119                    }
120                    Icmpv4DestUnreachableCode::PrecedenceCutoffInEffect => {
121                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
122                    }
123                    Icmpv4DestUnreachableCode::FragmentationRequired => {
124                        let mtu = message.next_hop_mtu().expect("stack should always fill in MTU");
125                        let mtu = Mtu::new(mtu.get().into());
126                        let mms = Mms::from_mtu::<Ipv4>(mtu, 0 /* no IP options used */)?;
127                        Some(IcmpErrorResult::PmtuUpdate(mms))
128                    }
129                }
130            }
131            IcmpErrorCode::V4(Icmpv4ErrorCode::ParameterProblem(_)) => {
132                Some(IcmpErrorResult::ConnectionError(ConnectionError::ProtocolError))
133            }
134            IcmpErrorCode::V4(Icmpv4ErrorCode::TimeExceeded(
135                Icmpv4TimeExceededCode::TtlExpired,
136            )) => Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable)),
137            IcmpErrorCode::V4(Icmpv4ErrorCode::TimeExceeded(
138                Icmpv4TimeExceededCode::FragmentReassemblyTimeExceeded,
139            )) => Some(IcmpErrorResult::ConnectionError(ConnectionError::TimedOut)),
140            IcmpErrorCode::V4(Icmpv4ErrorCode::Redirect(_)) => None,
141            IcmpErrorCode::V6(Icmpv6ErrorCode::DestUnreachable(code)) => {
142                Some(IcmpErrorResult::ConnectionError(match code {
143                    Icmpv6DestUnreachableCode::NoRoute => ConnectionError::NetworkUnreachable,
144                    Icmpv6DestUnreachableCode::CommAdministrativelyProhibited => {
145                        ConnectionError::PermissionDenied
146                    }
147                    Icmpv6DestUnreachableCode::BeyondScope => ConnectionError::HostUnreachable,
148                    Icmpv6DestUnreachableCode::AddrUnreachable => ConnectionError::HostUnreachable,
149                    Icmpv6DestUnreachableCode::PortUnreachable => ConnectionError::PortUnreachable,
150                    Icmpv6DestUnreachableCode::SrcAddrFailedPolicy => {
151                        ConnectionError::PermissionDenied
152                    }
153                    Icmpv6DestUnreachableCode::RejectRoute => ConnectionError::PermissionDenied,
154                }))
155            }
156            IcmpErrorCode::V6(Icmpv6ErrorCode::ParameterProblem(_)) => {
157                Some(IcmpErrorResult::ConnectionError(ConnectionError::ProtocolError))
158            }
159            IcmpErrorCode::V6(Icmpv6ErrorCode::TimeExceeded(_)) => {
160                Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
161            }
162            IcmpErrorCode::V6(Icmpv6ErrorCode::PacketTooBig(mtu)) => {
163                let mms = Mms::from_mtu::<Ipv6>(mtu, 0 /* no IP options used */)?;
164                Some(IcmpErrorResult::PmtuUpdate(mms))
165            }
166        }
167    }
168}
169
170/// Metadata associated with an outgoing TCP packet.
171#[derive(Derivative, GenericOverIp)]
172#[generic_over_ip(I, Ip)]
173#[derivative(Debug(bound = ""))]
174#[cfg_attr(any(test, feature = "testutils"), derivative(PartialEq(bound = "")))]
175pub struct TcpSocketTxMetadata<I: DualStackIpExt, D: WeakDeviceIdentifier, BT: TcpBindingsTypes> {
176    /// The socket from which the packet originates.
177    socket: WeakTcpSocketId<I, D, BT>,
178}
179
180impl<I: DualStackIpExt, D: WeakDeviceIdentifier, BT: TcpBindingsTypes>
181    TcpSocketTxMetadata<I, D, BT>
182{
183    /// Creates a new `TcpSocketTxMetadata`.
184    pub(crate) fn new(socket: WeakTcpSocketId<I, D, BT>) -> Self {
185        Self { socket }
186    }
187
188    /// Gets the socket from which the packet originates.
189    pub fn socket(&self) -> &WeakTcpSocketId<I, D, BT> {
190        &self.socket
191    }
192}
193
194/// Stack wide state supporting TCP.
195#[derive(GenericOverIp)]
196#[generic_over_ip(I, Ip)]
197pub struct TcpState<I: DualStackIpExt, D: WeakDeviceIdentifier, BT: TcpBindingsTypes> {
198    /// The initial sequence number generator.
199    pub isn_generator: IsnGenerator<BT::Instant>,
200    /// TCP sockets state.
201    pub sockets: Sockets<I, D, BT>,
202    /// TCP counters that cannot be attributed to a specific socket.
203    pub counters_without_socket: TcpCountersWithoutSocket<I>,
204    /// TCP counters that can be attributed to a specific socket.
205    pub counters_with_socket: TcpCountersWithSocket<I>,
206}
207
208impl<I: DualStackIpExt, D: WeakDeviceIdentifier, BT: TcpBindingsTypes> TcpState<I, D, BT> {
209    /// Creates a new TCP stack state.
210    pub fn new(now: BT::Instant, rng: &mut impl Rng) -> Self {
211        Self {
212            isn_generator: IsnGenerator::new(now, rng),
213            sockets: Sockets::new(),
214            counters_without_socket: Default::default(),
215            counters_with_socket: Default::default(),
216        }
217    }
218}
219
220/// Named tuple for holding sizes of buffers for a socket.
221#[derive(Copy, Clone, Debug)]
222#[cfg_attr(test, derive(Eq, PartialEq))]
223pub struct BufferSizes {
224    /// The size of the send buffer.
225    pub send: usize,
226    /// The size of the receive buffer.
227    pub receive: usize,
228}
229/// Sensible defaults only for testing.
230#[cfg(any(test, feature = "testutils"))]
231impl Default for BufferSizes {
232    fn default() -> Self {
233        BufferSizes { send: WindowSize::DEFAULT.into(), receive: WindowSize::DEFAULT.into() }
234    }
235}
236
237impl BufferSizes {
238    pub(crate) fn rcv_limits(&self) -> BufferLimits {
239        let Self { send: _, receive } = self;
240        BufferLimits { capacity: *receive, len: 0 }
241    }
242
243    pub(crate) fn rwnd(&self) -> WindowSize {
244        let Self { send: _, receive } = *self;
245        WindowSize::new(receive).unwrap_or(WindowSize::MAX)
246    }
247
248    pub(crate) fn rwnd_unscaled(&self) -> UnscaledWindowSize {
249        let Self { send: _, receive } = *self;
250        UnscaledWindowSize::from(u16::try_from(receive).unwrap_or(u16::MAX))
251    }
252}
253
254/// A mutable reference to buffer configuration.
255pub(crate) enum BuffersRefMut<'a, R, S> {
256    /// All buffers are dropped.
257    NoBuffers,
258    /// Buffer sizes are configured but not instantiated yet.
259    Sizes(&'a mut BufferSizes),
260    /// Buffers are instantiated and mutable references are provided.
261    Both { send: &'a mut S, recv: &'a mut R },
262    /// Only the send buffer is still instantiated, which happens in Closing
263    /// states.
264    SendOnly(&'a mut S),
265    /// Only the receive buffer is still instantiated, which happens in Finwait
266    /// states.
267    RecvOnly(&'a mut R),
268}
269
270impl<'a, R, S> BuffersRefMut<'a, R, S> {
271    pub(crate) fn into_send_buffer(self) -> Option<&'a mut S> {
272        match self {
273            Self::NoBuffers | Self::Sizes(_) | Self::RecvOnly(_) => None,
274            Self::Both { send, recv: _ } | Self::SendOnly(send) => Some(send),
275        }
276    }
277
278    pub(crate) fn into_receive_buffer(self) -> Option<&'a mut R> {
279        match self {
280            Self::NoBuffers | Self::Sizes(_) | Self::SendOnly(_) => None,
281            Self::Both { send: _, recv } | Self::RecvOnly(recv) => Some(recv),
282        }
283    }
284}
285
286/// The IP sock options used by TCP.
287#[derive(Clone, Copy, Default, Debug, PartialEq, Eq)]
288pub struct TcpIpSockOptions {
289    /// Socket marks used for routing.
290    pub marks: Marks,
291}
292
293impl<I: Ip> RouteResolutionOptions<I> for TcpIpSockOptions {
294    fn marks(&self) -> &Marks {
295        &self.marks
296    }
297
298    fn transparent(&self) -> bool {
299        false
300    }
301}
302
303impl<I: IpExt> SendOptions<I> for TcpIpSockOptions {
304    fn hop_limit(&self, _destination: &SpecifiedAddr<I::Addr>) -> Option<NonZeroU8> {
305        None
306    }
307
308    fn multicast_loop(&self) -> bool {
309        false
310    }
311
312    fn allow_broadcast(&self) -> Option<I::BroadcastMarker> {
313        None
314    }
315
316    fn dscp_and_ecn(&self) -> DscpAndEcn {
317        DscpAndEcn::default()
318    }
319
320    fn mtu(&self) -> Mtu {
321        Mtu::no_limit()
322    }
323}
324
325/// TCP socket options.
326///
327/// This only stores options that are trivial to get and set.
328#[derive(Clone, Copy, Debug, PartialEq, Eq)]
329pub struct SocketOptions {
330    /// Socket options that control TCP keep-alive mechanism, see [`KeepAlive`].
331    pub keep_alive: KeepAlive,
332    /// Switch to turn nagle algorithm on/off.
333    pub nagle_enabled: bool,
334    /// The period of time after which the connection should be aborted if no
335    /// ACK is received.
336    pub user_timeout: Option<NonZeroDuration>,
337    /// Switch to turn delayed ACK on/off.
338    pub delayed_ack: bool,
339    /// The period of time after with a dangling FIN_WAIT2 state should be
340    /// reclaimed.
341    pub fin_wait2_timeout: Option<Duration>,
342    /// The maximum SYN retransmissions before aborting a connection.
343    pub max_syn_retries: NonZeroU8,
344    /// Ip socket options.
345    pub ip_options: TcpIpSockOptions,
346}
347
348impl Default for SocketOptions {
349    fn default() -> Self {
350        Self {
351            keep_alive: KeepAlive::default(),
352            // RFC 9293 Section 3.7.4:
353            //   A TCP implementation SHOULD implement the Nagle algorithm to
354            //   coalesce short segments
355            nagle_enabled: true,
356            user_timeout: None,
357            delayed_ack: true,
358            fin_wait2_timeout: Some(DEFAULT_FIN_WAIT2_TIMEOUT),
359            max_syn_retries: DEFAULT_MAX_SYN_RETRIES,
360            ip_options: TcpIpSockOptions::default(),
361        }
362    }
363}
364
365/// Options that are related to TCP keep-alive.
366#[derive(Clone, Copy, Debug, PartialEq, Eq)]
367pub struct KeepAlive {
368    /// The amount of time for an idle connection to wait before sending out
369    /// probes.
370    pub idle: NonZeroDuration,
371    /// Interval between consecutive probes.
372    pub interval: NonZeroDuration,
373    /// Maximum number of probes we send before considering the connection dead.
374    ///
375    /// `u8` is enough because if a connection doesn't hear back from the peer
376    /// after 256 probes, then chances are that the connection is already dead.
377    pub count: NonZeroU8,
378    /// Only send probes if keep-alive is enabled.
379    pub enabled: bool,
380}
381
382impl Default for KeepAlive {
383    fn default() -> Self {
384        // Default values inspired by Linux's TCP implementation:
385        // https://github.com/torvalds/linux/blob/0326074ff4652329f2a1a9c8685104576bd8d131/include/net/tcp.h#L155-L157
386        const DEFAULT_IDLE_DURATION: NonZeroDuration =
387            NonZeroDuration::from_secs(2 * 60 * 60).unwrap();
388        const DEFAULT_INTERVAL: NonZeroDuration = NonZeroDuration::from_secs(75).unwrap();
389        const DEFAULT_COUNT: NonZeroU8 = NonZeroU8::new(9).unwrap();
390
391        Self {
392            idle: DEFAULT_IDLE_DURATION,
393            interval: DEFAULT_INTERVAL,
394            count: DEFAULT_COUNT,
395            // Per RFC 9293(https://datatracker.ietf.org/doc/html/rfc9293#section-3.8.4):
396            //   ... they MUST default to off.
397            enabled: false,
398        }
399    }
400}