From b0e44b2726aa1df6ee2ef31891ee60d8147bd31b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 2 Nov 2023 15:24:40 +0000 Subject: [PATCH] Revert "tcp: enforce receive buffer memory limits by allowing the tcp window to shrink" This reverts commit 0796c534242da7bc218ab1eefd6dacc48300302c which is commit b650d953cd391595e536153ce30b4aab385643ac upstream. It breaks the Android ABI so revert it for now, if it is needed in the future, it can be brought back in an ABI-safe way. Bug: 161946584 Change-Id: I6d4865da903caac56dbca2c7c0cfca14622760d4 Signed-off-by: Greg Kroah-Hartman --- Documentation/networking/ip-sysctl.rst | 15 ------- include/net/netns/ipv4.h | 1 - net/ipv4/sysctl_net_ipv4.c | 9 ---- net/ipv4/tcp_ipv4.c | 2 - net/ipv4/tcp_output.c | 60 ++++---------------------- 5 files changed, 9 insertions(+), 78 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index f3558637a6ea..3301288a7c69 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -967,21 +967,6 @@ tcp_tw_reuse - INTEGER tcp_window_scaling - BOOLEAN Enable window scaling as defined in RFC1323. -tcp_shrink_window - BOOLEAN - This changes how the TCP receive window is calculated. - - RFC 7323, section 2.4, says there are instances when a retracted - window can be offered, and that TCP implementations MUST ensure - that they handle a shrinking window, as specified in RFC 1122. - - - 0 - Disabled. The window is never shrunk. - - 1 - Enabled. The window is shrunk when necessary to remain within - the memory limit set by autotuning (sk_rcvbuf). - This only occurs if a non-zero receive window - scaling factor is also in effect. - - Default: 0 - tcp_wmem - vector of 3 INTEGERs: min, default, max min: Amount of memory reserved for send buffers for TCP sockets. Each TCP socket has rights to use it due to fact of its birth. diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 04a74d30d009..408c70a83e39 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -65,7 +65,6 @@ struct netns_ipv4 { #endif bool fib_has_custom_local_routes; bool fib_offload_disabled; - u8 sysctl_tcp_shrink_window; #ifdef CONFIG_IP_ROUTE_CLASSID atomic_t fib_num_tclassid_users; #endif diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 73e5821584c1..f68762ce4d8a 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1387,15 +1387,6 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, - { - .procname = "tcp_shrink_window", - .data = &init_net.ipv4.sysctl_tcp_shrink_window, - .maxlen = sizeof(u8), - .mode = 0644, - .proc_handler = proc_dou8vec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e75603a05f4e..08921b96f972 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -3221,8 +3221,6 @@ static int __net_init tcp_sk_init(struct net *net) else net->ipv4.tcp_congestion_control = &tcp_reno; - net->ipv4.sysctl_tcp_shrink_window = 0; - return 0; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 443b1cab2529..5921b0f6f9f4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -259,8 +259,8 @@ static u16 tcp_select_window(struct sock *sk) u32 old_win = tp->rcv_wnd; u32 cur_win = tcp_receive_window(tp); u32 new_win = __tcp_select_window(sk); - struct net *net = sock_net(sk); + /* Never shrink the offered window */ if (new_win < cur_win) { /* Danger Will Robinson! * Don't update rcv_wup/rcv_wnd here or else @@ -269,14 +269,11 @@ static u16 tcp_select_window(struct sock *sk) * * Relax Will Robinson. */ - if (!READ_ONCE(net->ipv4.sysctl_tcp_shrink_window) || !tp->rx_opt.rcv_wscale) { - /* Never shrink the offered window */ - if (new_win == 0) - NET_INC_STATS(net, LINUX_MIB_TCPWANTZEROWINDOWADV); - new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale); - } + if (new_win == 0) + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPWANTZEROWINDOWADV); + new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale); } - tp->rcv_wnd = new_win; tp->rcv_wup = tp->rcv_nxt; @@ -284,7 +281,7 @@ static u16 tcp_select_window(struct sock *sk) * scaled window. */ if (!tp->rx_opt.rcv_wscale && - READ_ONCE(net->ipv4.sysctl_tcp_workaround_signed_windows)) + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)) new_win = min(new_win, MAX_TCP_WINDOW); else new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale)); @@ -296,9 +293,10 @@ static u16 tcp_select_window(struct sock *sk) if (new_win == 0) { tp->pred_flags = 0; if (old_win) - NET_INC_STATS(net, LINUX_MIB_TCPTOZEROWINDOWADV); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPTOZEROWINDOWADV); } else if (old_win == 0) { - NET_INC_STATS(net, LINUX_MIB_TCPFROMZEROWINDOWADV); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFROMZEROWINDOWADV); } return new_win; @@ -2951,7 +2949,6 @@ u32 __tcp_select_window(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - struct net *net = sock_net(sk); /* MSS for the peer's data. Previous versions used mss_clamp * here. I don't know if the value based on our guesses * of peer's MSS is better for the performance. It's more correct @@ -2973,15 +2970,6 @@ u32 __tcp_select_window(struct sock *sk) if (mss <= 0) return 0; } - - /* Only allow window shrink if the sysctl is enabled and we have - * a non-zero scaling factor in effect. - */ - if (READ_ONCE(net->ipv4.sysctl_tcp_shrink_window) && tp->rx_opt.rcv_wscale) - goto shrink_window_allowed; - - /* do not allow window to shrink */ - if (free_space < (full_space >> 1)) { icsk->icsk_ack.quick = 0; @@ -3036,36 +3024,6 @@ u32 __tcp_select_window(struct sock *sk) } return window; - -shrink_window_allowed: - /* new window should always be an exact multiple of scaling factor */ - free_space = round_down(free_space, 1 << tp->rx_opt.rcv_wscale); - - if (free_space < (full_space >> 1)) { - icsk->icsk_ack.quick = 0; - - if (tcp_under_memory_pressure(sk)) - tcp_adjust_rcv_ssthresh(sk); - - /* if free space is too low, return a zero window */ - if (free_space < (allowed_space >> 4) || free_space < mss || - free_space < (1 << tp->rx_opt.rcv_wscale)) - return 0; - } - - if (free_space > tp->rcv_ssthresh) { - free_space = tp->rcv_ssthresh; - /* new window should always be an exact multiple of scaling factor - * - * For this case, we ALIGN "up" (increase free_space) because - * we know free_space is not zero here, it has been reduced from - * the memory-based limit, and rcv_ssthresh is not a hard limit - * (unlike sk_rcvbuf). - */ - free_space = ALIGN(free_space, (1 << tp->rx_opt.rcv_wscale)); - } - - return free_space; } void tcp_skb_collapse_tstamp(struct sk_buff *skb,