From 227355ad4e4a6da5435451b3cc7f3ed9091288fa Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Thu, 8 Aug 2024 16:06:40 -0700 Subject: [PATCH] tcp: Update window clamping condition [ Upstream commit a2cbb1603943281a604f5adc48079a148db5cb0d ] This patch is based on the discussions between Neal Cardwell and Eric Dumazet in the link https://lore.kernel.org/netdev/20240726204105.1466841-1-quic_subashab@quicinc.com/ It was correctly pointed out that tp->window_clamp would not be updated in cases where net.ipv4.tcp_moderate_rcvbuf=0 or if (copied <= tp->rcvq_space.space). While it is expected for most setups to leave the sysctl enabled, the latter condition may not end up hitting depending on the TCP receive queue size and the pattern of arriving data. The updated check should be hit only on initial MSS update from TCP_MIN_MSS to measured MSS value and subsequently if there was an update to a larger value. Fixes: 05f76b2d634e ("tcp: Adjust clamping window for applications specifying SO_RCVBUF") Signed-off-by: Sean Tranchetti Signed-off-by: Subash Abhinov Kasiviswanathan Acked-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_input.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d0364cff65c9..24c7c955dc95 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -243,9 +243,14 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) */ if (unlikely(len != icsk->icsk_ack.rcv_mss)) { u64 val = (u64)skb->len << TCP_RMEM_TO_WIN_SCALE; + u8 old_ratio = tcp_sk(sk)->scaling_ratio; do_div(val, skb->truesize); tcp_sk(sk)->scaling_ratio = val ? val : 1; + + if (old_ratio != tcp_sk(sk)->scaling_ratio) + WRITE_ONCE(tcp_sk(sk)->window_clamp, + tcp_win_from_space(sk, sk->sk_rcvbuf)); } icsk->icsk_ack.rcv_mss = min_t(unsigned int, len, tcp_sk(sk)->advmss); @@ -748,7 +753,8 @@ void tcp_rcv_space_adjust(struct sock *sk) * */ - if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf)) { + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) && + !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { u64 rcvwin, grow; int rcvbuf; @@ -764,22 +770,12 @@ void tcp_rcv_space_adjust(struct sock *sk) rcvbuf = min_t(u64, tcp_space_from_win(sk, rcvwin), READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); - if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { - if (rcvbuf > sk->sk_rcvbuf) { - WRITE_ONCE(sk->sk_rcvbuf, rcvbuf); + if (rcvbuf > sk->sk_rcvbuf) { + WRITE_ONCE(sk->sk_rcvbuf, rcvbuf); - /* Make the window clamp follow along. */ - WRITE_ONCE(tp->window_clamp, - tcp_win_from_space(sk, rcvbuf)); - } - } else { - /* Make the window clamp follow along while being bounded - * by SO_RCVBUF. - */ - int clamp = tcp_win_from_space(sk, min(rcvbuf, sk->sk_rcvbuf)); - - if (clamp > tp->window_clamp) - WRITE_ONCE(tp->window_clamp, clamp); + /* Make the window clamp follow along. */ + WRITE_ONCE(tp->window_clamp, + tcp_win_from_space(sk, rcvbuf)); } } tp->rcvq_space.space = copied;