netstack3_tcp/
base.rs

1// Copyright 2022 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! The Transmission Control Protocol (TCP).
6
7use core::num::NonZeroU8;
8use core::time::Duration;
9
10use net_types::ip::{GenericOverIp, Ip, Ipv4, Ipv6, Mtu};
11use net_types::SpecifiedAddr;
12use netstack3_base::{
13    IcmpErrorCode, Icmpv4ErrorCode, Icmpv6ErrorCode, IpExt, Marks, Mms, UnscaledWindowSize,
14    WeakDeviceIdentifier, WindowSize,
15};
16use netstack3_ip::socket::{RouteResolutionOptions, SendOptions};
17use packet_formats::icmp::{
18    Icmpv4DestUnreachableCode, Icmpv4TimeExceededCode, Icmpv6DestUnreachableCode,
19};
20use packet_formats::ip::DscpAndEcn;
21use packet_formats::utils::NonZeroDuration;
22use rand::Rng;
23
24use crate::internal::buffer::BufferLimits;
25use crate::internal::counters::{TcpCountersWithSocket, TcpCountersWithoutSocket};
26use crate::internal::socket::isn::IsnGenerator;
27use crate::internal::socket::{DualStackIpExt, Sockets, TcpBindingsTypes};
28use crate::internal::state::DEFAULT_MAX_SYN_RETRIES;
29
30/// Default lifetime for a orphaned connection in FIN_WAIT2.
31pub const DEFAULT_FIN_WAIT2_TIMEOUT: Duration = Duration::from_secs(60);
32
33/// Errors surfaced to the user.
34#[derive(Copy, Clone, Debug, PartialEq, Eq)]
35pub enum ConnectionError {
36    /// The connection was refused, RST segment received while in SYN_SENT state.
37    ConnectionRefused,
38    /// The connection was reset because of a RST segment.
39    ConnectionReset,
40    /// The connection was closed because the network is unreachable.
41    NetworkUnreachable,
42    /// The connection was closed because the host is unreachable.
43    HostUnreachable,
44    /// The connection was closed because the protocol is unreachable.
45    ProtocolUnreachable,
46    /// The connection was closed because the port is unreachable.
47    PortUnreachable,
48    /// The connection was closed because the host is down.
49    DestinationHostDown,
50    /// The connection was closed because the source route failed.
51    SourceRouteFailed,
52    /// The connection was closed because the source host is isolated.
53    SourceHostIsolated,
54    /// The connection was closed because of a time out.
55    TimedOut,
56    /// The connection was closed because of a lack of required permissions.
57    PermissionDenied,
58    /// The connection was closed because there was a protocol error.
59    ProtocolError,
60}
61
62/// The meaning of a particular ICMP error to a TCP socket.
63pub(crate) enum IcmpErrorResult {
64    /// There has been an error on the connection that must be handled.
65    ConnectionError(ConnectionError),
66    /// The PMTU used by the connection has been updated.
67    PmtuUpdate(Mms),
68}
69
70impl IcmpErrorResult {
71    // Notes: the following mappings are guided by the packetimpact test here:
72    // https://cs.opensource.google/gvisor/gvisor/+/master:test/packetimpact/tests/tcp_network_unreachable_test.go;drc=611e6e1247a0691f5fd198f411c68b3bc79d90af
73    pub(crate) fn try_from_icmp_error(err: IcmpErrorCode) -> Option<IcmpErrorResult> {
74        match err {
75            IcmpErrorCode::V4(Icmpv4ErrorCode::DestUnreachable(code, message)) => {
76                match code {
77                    Icmpv4DestUnreachableCode::DestNetworkUnreachable => {
78                        Some(IcmpErrorResult::ConnectionError(ConnectionError::NetworkUnreachable))
79                    }
80                    Icmpv4DestUnreachableCode::DestHostUnreachable => {
81                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
82                    }
83                    Icmpv4DestUnreachableCode::DestProtocolUnreachable => {
84                        Some(IcmpErrorResult::ConnectionError(ConnectionError::ProtocolUnreachable))
85                    }
86                    Icmpv4DestUnreachableCode::DestPortUnreachable => {
87                        Some(IcmpErrorResult::ConnectionError(ConnectionError::PortUnreachable))
88                    }
89                    Icmpv4DestUnreachableCode::SourceRouteFailed => {
90                        Some(IcmpErrorResult::ConnectionError(ConnectionError::SourceRouteFailed))
91                    }
92                    Icmpv4DestUnreachableCode::DestNetworkUnknown => {
93                        Some(IcmpErrorResult::ConnectionError(ConnectionError::NetworkUnreachable))
94                    }
95                    Icmpv4DestUnreachableCode::DestHostUnknown => {
96                        Some(IcmpErrorResult::ConnectionError(ConnectionError::DestinationHostDown))
97                    }
98                    Icmpv4DestUnreachableCode::SourceHostIsolated => {
99                        Some(IcmpErrorResult::ConnectionError(ConnectionError::SourceHostIsolated))
100                    }
101                    Icmpv4DestUnreachableCode::NetworkAdministrativelyProhibited => {
102                        Some(IcmpErrorResult::ConnectionError(ConnectionError::NetworkUnreachable))
103                    }
104                    Icmpv4DestUnreachableCode::HostAdministrativelyProhibited => {
105                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
106                    }
107                    Icmpv4DestUnreachableCode::NetworkUnreachableForToS => {
108                        Some(IcmpErrorResult::ConnectionError(ConnectionError::NetworkUnreachable))
109                    }
110                    Icmpv4DestUnreachableCode::HostUnreachableForToS => {
111                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
112                    }
113                    Icmpv4DestUnreachableCode::CommAdministrativelyProhibited => {
114                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
115                    }
116                    Icmpv4DestUnreachableCode::HostPrecedenceViolation => {
117                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
118                    }
119                    Icmpv4DestUnreachableCode::PrecedenceCutoffInEffect => {
120                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
121                    }
122                    Icmpv4DestUnreachableCode::FragmentationRequired => {
123                        let mtu = message.next_hop_mtu().expect("stack should always fill in MTU");
124                        let mtu = Mtu::new(mtu.get().into());
125                        let mms = Mms::from_mtu::<Ipv4>(mtu, 0 /* no IP options used */)?;
126                        Some(IcmpErrorResult::PmtuUpdate(mms))
127                    }
128                }
129            }
130            IcmpErrorCode::V4(Icmpv4ErrorCode::ParameterProblem(_)) => {
131                Some(IcmpErrorResult::ConnectionError(ConnectionError::ProtocolError))
132            }
133            IcmpErrorCode::V4(Icmpv4ErrorCode::TimeExceeded(
134                Icmpv4TimeExceededCode::TtlExpired,
135            )) => Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable)),
136            IcmpErrorCode::V4(Icmpv4ErrorCode::TimeExceeded(
137                Icmpv4TimeExceededCode::FragmentReassemblyTimeExceeded,
138            )) => Some(IcmpErrorResult::ConnectionError(ConnectionError::TimedOut)),
139            IcmpErrorCode::V4(Icmpv4ErrorCode::Redirect(_)) => None,
140            IcmpErrorCode::V6(Icmpv6ErrorCode::DestUnreachable(code)) => {
141                Some(IcmpErrorResult::ConnectionError(match code {
142                    Icmpv6DestUnreachableCode::NoRoute => ConnectionError::NetworkUnreachable,
143                    Icmpv6DestUnreachableCode::CommAdministrativelyProhibited => {
144                        ConnectionError::PermissionDenied
145                    }
146                    Icmpv6DestUnreachableCode::BeyondScope => ConnectionError::HostUnreachable,
147                    Icmpv6DestUnreachableCode::AddrUnreachable => ConnectionError::HostUnreachable,
148                    Icmpv6DestUnreachableCode::PortUnreachable => ConnectionError::PortUnreachable,
149                    Icmpv6DestUnreachableCode::SrcAddrFailedPolicy => {
150                        ConnectionError::PermissionDenied
151                    }
152                    Icmpv6DestUnreachableCode::RejectRoute => ConnectionError::PermissionDenied,
153                }))
154            }
155            IcmpErrorCode::V6(Icmpv6ErrorCode::ParameterProblem(_)) => {
156                Some(IcmpErrorResult::ConnectionError(ConnectionError::ProtocolError))
157            }
158            IcmpErrorCode::V6(Icmpv6ErrorCode::TimeExceeded(_)) => {
159                Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
160            }
161            IcmpErrorCode::V6(Icmpv6ErrorCode::PacketTooBig(mtu)) => {
162                let mms = Mms::from_mtu::<Ipv6>(mtu, 0 /* no IP options used */)?;
163                Some(IcmpErrorResult::PmtuUpdate(mms))
164            }
165        }
166    }
167}
168
169/// Stack wide state supporting TCP.
170#[derive(GenericOverIp)]
171#[generic_over_ip(I, Ip)]
172pub struct TcpState<I: DualStackIpExt, D: WeakDeviceIdentifier, BT: TcpBindingsTypes> {
173    /// The initial sequence number generator.
174    pub isn_generator: IsnGenerator<BT::Instant>,
175    /// TCP sockets state.
176    pub sockets: Sockets<I, D, BT>,
177    /// TCP counters that cannot be attributed to a specific socket.
178    pub counters_without_socket: TcpCountersWithoutSocket<I>,
179    /// TCP counters that can be attributed to a specific socket.
180    pub counters_with_socket: TcpCountersWithSocket<I>,
181}
182
183impl<I: DualStackIpExt, D: WeakDeviceIdentifier, BT: TcpBindingsTypes> TcpState<I, D, BT> {
184    /// Creates a new TCP stack state.
185    pub fn new(now: BT::Instant, rng: &mut impl Rng) -> Self {
186        Self {
187            isn_generator: IsnGenerator::new(now, rng),
188            sockets: Sockets::new(),
189            counters_without_socket: Default::default(),
190            counters_with_socket: Default::default(),
191        }
192    }
193}
194
195/// Named tuple for holding sizes of buffers for a socket.
196#[derive(Copy, Clone, Debug)]
197#[cfg_attr(test, derive(Eq, PartialEq))]
198pub struct BufferSizes {
199    /// The size of the send buffer.
200    pub send: usize,
201    /// The size of the receive buffer.
202    pub receive: usize,
203}
204/// Sensible defaults only for testing.
205#[cfg(any(test, feature = "testutils"))]
206impl Default for BufferSizes {
207    fn default() -> Self {
208        BufferSizes { send: WindowSize::DEFAULT.into(), receive: WindowSize::DEFAULT.into() }
209    }
210}
211
212impl BufferSizes {
213    pub(crate) fn rcv_limits(&self) -> BufferLimits {
214        let Self { send: _, receive } = self;
215        BufferLimits { capacity: *receive, len: 0 }
216    }
217
218    pub(crate) fn rwnd(&self) -> WindowSize {
219        let Self { send: _, receive } = *self;
220        WindowSize::new(receive).unwrap_or(WindowSize::MAX)
221    }
222
223    pub(crate) fn rwnd_unscaled(&self) -> UnscaledWindowSize {
224        let Self { send: _, receive } = *self;
225        UnscaledWindowSize::from(u16::try_from(receive).unwrap_or(u16::MAX))
226    }
227}
228
229/// A mutable reference to buffer configuration.
230pub(crate) enum BuffersRefMut<'a, R, S> {
231    /// All buffers are dropped.
232    NoBuffers,
233    /// Buffer sizes are configured but not instantiated yet.
234    Sizes(&'a mut BufferSizes),
235    /// Buffers are instantiated and mutable references are provided.
236    Both { send: &'a mut S, recv: &'a mut R },
237    /// Only the send buffer is still instantiated, which happens in Closing
238    /// states.
239    SendOnly(&'a mut S),
240    /// Only the receive buffer is still instantiated, which happens in Finwait
241    /// states.
242    RecvOnly(&'a mut R),
243}
244
245impl<'a, R, S> BuffersRefMut<'a, R, S> {
246    pub(crate) fn into_send_buffer(self) -> Option<&'a mut S> {
247        match self {
248            Self::NoBuffers | Self::Sizes(_) | Self::RecvOnly(_) => None,
249            Self::Both { send, recv: _ } | Self::SendOnly(send) => Some(send),
250        }
251    }
252
253    pub(crate) fn into_receive_buffer(self) -> Option<&'a mut R> {
254        match self {
255            Self::NoBuffers | Self::Sizes(_) | Self::SendOnly(_) => None,
256            Self::Both { send: _, recv } | Self::RecvOnly(recv) => Some(recv),
257        }
258    }
259}
260
261/// The IP sock options used by TCP.
262#[derive(Clone, Copy, Default, Debug, PartialEq, Eq)]
263pub struct TcpIpSockOptions {
264    /// Socket marks used for routing.
265    pub marks: Marks,
266}
267
268impl<I: Ip> RouteResolutionOptions<I> for TcpIpSockOptions {
269    fn marks(&self) -> &Marks {
270        &self.marks
271    }
272
273    fn transparent(&self) -> bool {
274        false
275    }
276}
277
278impl<I: IpExt> SendOptions<I> for TcpIpSockOptions {
279    fn hop_limit(&self, _destination: &SpecifiedAddr<I::Addr>) -> Option<NonZeroU8> {
280        None
281    }
282
283    fn multicast_loop(&self) -> bool {
284        false
285    }
286
287    fn allow_broadcast(&self) -> Option<I::BroadcastMarker> {
288        None
289    }
290
291    fn dscp_and_ecn(&self) -> DscpAndEcn {
292        DscpAndEcn::default()
293    }
294
295    fn mtu(&self) -> Mtu {
296        Mtu::no_limit()
297    }
298}
299
300/// TCP socket options.
301///
302/// This only stores options that are trivial to get and set.
303#[derive(Clone, Copy, Debug, PartialEq, Eq)]
304pub struct SocketOptions {
305    /// Socket options that control TCP keep-alive mechanism, see [`KeepAlive`].
306    pub keep_alive: KeepAlive,
307    /// Switch to turn nagle algorithm on/off.
308    pub nagle_enabled: bool,
309    /// The period of time after which the connection should be aborted if no
310    /// ACK is received.
311    pub user_timeout: Option<NonZeroDuration>,
312    /// Switch to turn delayed ACK on/off.
313    pub delayed_ack: bool,
314    /// The period of time after with a dangling FIN_WAIT2 state should be
315    /// reclaimed.
316    pub fin_wait2_timeout: Option<Duration>,
317    /// The maximum SYN retransmissions before aborting a connection.
318    pub max_syn_retries: NonZeroU8,
319    /// Ip socket options.
320    pub ip_options: TcpIpSockOptions,
321}
322
323impl Default for SocketOptions {
324    fn default() -> Self {
325        Self {
326            keep_alive: KeepAlive::default(),
327            // RFC 9293 Section 3.7.4:
328            //   A TCP implementation SHOULD implement the Nagle algorithm to
329            //   coalesce short segments
330            nagle_enabled: true,
331            user_timeout: None,
332            delayed_ack: true,
333            fin_wait2_timeout: Some(DEFAULT_FIN_WAIT2_TIMEOUT),
334            max_syn_retries: DEFAULT_MAX_SYN_RETRIES,
335            ip_options: TcpIpSockOptions::default(),
336        }
337    }
338}
339
340/// Options that are related to TCP keep-alive.
341#[derive(Clone, Copy, Debug, PartialEq, Eq)]
342pub struct KeepAlive {
343    /// The amount of time for an idle connection to wait before sending out
344    /// probes.
345    pub idle: NonZeroDuration,
346    /// Interval between consecutive probes.
347    pub interval: NonZeroDuration,
348    /// Maximum number of probes we send before considering the connection dead.
349    ///
350    /// `u8` is enough because if a connection doesn't hear back from the peer
351    /// after 256 probes, then chances are that the connection is already dead.
352    pub count: NonZeroU8,
353    /// Only send probes if keep-alive is enabled.
354    pub enabled: bool,
355}
356
357impl Default for KeepAlive {
358    fn default() -> Self {
359        // Default values inspired by Linux's TCP implementation:
360        // https://github.com/torvalds/linux/blob/0326074ff4652329f2a1a9c8685104576bd8d131/include/net/tcp.h#L155-L157
361        const DEFAULT_IDLE_DURATION: NonZeroDuration =
362            NonZeroDuration::from_secs(2 * 60 * 60).unwrap();
363        const DEFAULT_INTERVAL: NonZeroDuration = NonZeroDuration::from_secs(75).unwrap();
364        const DEFAULT_COUNT: NonZeroU8 = NonZeroU8::new(9).unwrap();
365
366        Self {
367            idle: DEFAULT_IDLE_DURATION,
368            interval: DEFAULT_INTERVAL,
369            count: DEFAULT_COUNT,
370            // Per RFC 9293(https://datatracker.ietf.org/doc/html/rfc9293#section-3.8.4):
371            //   ... they MUST default to off.
372            enabled: false,
373        }
374    }
375}
376
377#[cfg(test)]
378pub(crate) mod testutil {
379    use netstack3_base::Mss;
380    /// Per RFC 879 section 1 (https://tools.ietf.org/html/rfc879#section-1):
381    ///
382    /// THE TCP MAXIMUM SEGMENT SIZE IS THE IP MAXIMUM DATAGRAM SIZE MINUS
383    /// FORTY.
384    ///   The default IP Maximum Datagram Size is 576.
385    ///   The default TCP Maximum Segment Size is 536.
386    pub(crate) const DEFAULT_IPV4_MAXIMUM_SEGMENT_SIZE_USIZE: usize = 536;
387    pub(crate) const DEFAULT_IPV4_MAXIMUM_SEGMENT_SIZE: Mss =
388        Mss(core::num::NonZeroU16::new(DEFAULT_IPV4_MAXIMUM_SEGMENT_SIZE_USIZE as u16).unwrap());
389}