From f015c92c49584ee3c2e7883cb497706544e33c9b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 11 Aug 2022 12:33:39 +0200 Subject: [PATCH] Revert "ipv4/tcp: do not use per netns ctl sockets" This reverts commit a9ef8b2589885851c769982ef34cb01348fb07bb which is commit 37ba017dcc3b1123206808979834655ddcf93251 upstream. It breaks the Android kernel ABI and is not needed for Android devices, so it is safe to revert for now. If it is determined that it is needed in the future, it can be brought back in an abi-preserving way. Bug: 161946584 Signed-off-by: Greg Kroah-Hartman Change-Id: Ie1e6f2b24c5e360577bc66fde8f969eeea96e99d --- include/net/netns/ipv4.h | 1 + net/ipv4/tcp_ipv4.c | 61 ++++++++++++++++++++++------------------ 2 files changed, 35 insertions(+), 27 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 033d5c8ffdd6..fb94551d03e6 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -72,6 +72,7 @@ struct netns_ipv4 { struct sock *mc_autojoin_sk; struct inet_peer_base *peers; + struct sock * __percpu *tcp_sk; struct fqdir *fqdir; #ifdef CONFIG_NETFILTER struct xt_table *iptable_filter; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 275ae42be99e..8bd7b1ec3b6a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -91,8 +91,6 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, struct inet_hashinfo tcp_hashinfo; EXPORT_SYMBOL(tcp_hashinfo); -static DEFINE_PER_CPU(struct sock *, ipv4_tcp_sk); - static u32 tcp_v4_init_seq(const struct sk_buff *skb) { return secure_tcp_seq(ip_hdr(skb)->daddr, @@ -796,8 +794,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) arg.tos = ip_hdr(skb)->tos; arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); local_bh_disable(); - ctl_sk = this_cpu_read(ipv4_tcp_sk); - sock_net_set(ctl_sk, net); + ctl_sk = this_cpu_read(*net->ipv4.tcp_sk); if (sk) { ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? inet_twsk(sk)->tw_mark : sk->sk_mark; @@ -812,7 +809,6 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) transmit_time); ctl_sk->sk_mark = 0; - sock_net_set(ctl_sk, &init_net); __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); __TCP_INC_STATS(net, TCP_MIB_OUTRSTS); local_bh_enable(); @@ -896,8 +892,7 @@ static void tcp_v4_send_ack(const struct sock *sk, arg.tos = tos; arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); local_bh_disable(); - ctl_sk = this_cpu_read(ipv4_tcp_sk); - sock_net_set(ctl_sk, net); + ctl_sk = this_cpu_read(*net->ipv4.tcp_sk); ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? inet_twsk(sk)->tw_mark : sk->sk_mark; ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ? @@ -910,7 +905,6 @@ static void tcp_v4_send_ack(const struct sock *sk, transmit_time); ctl_sk->sk_mark = 0; - sock_net_set(ctl_sk, &init_net); __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); local_bh_enable(); } @@ -2834,14 +2828,41 @@ EXPORT_SYMBOL(tcp_prot); static void __net_exit tcp_sk_exit(struct net *net) { + int cpu; + if (net->ipv4.tcp_congestion_control) bpf_module_put(net->ipv4.tcp_congestion_control, net->ipv4.tcp_congestion_control->owner); + + for_each_possible_cpu(cpu) + inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu)); + free_percpu(net->ipv4.tcp_sk); } static int __net_init tcp_sk_init(struct net *net) { - int cnt; + int res, cpu, cnt; + + net->ipv4.tcp_sk = alloc_percpu(struct sock *); + if (!net->ipv4.tcp_sk) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + struct sock *sk; + + res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW, + IPPROTO_TCP, net); + if (res) + goto fail; + sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); + + /* Please enforce IP_DF and IPID==0 for RST and + * ACK sent in SYN-RECV and TIME-WAIT state. + */ + inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO; + + *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk; + } net->ipv4.sysctl_tcp_ecn = 2; net->ipv4.sysctl_tcp_ecn_fallback = 1; @@ -2926,6 +2947,10 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.tcp_congestion_control = &tcp_reno; return 0; +fail: + tcp_sk_exit(net); + + return res; } static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) @@ -3002,24 +3027,6 @@ static void __init bpf_iter_register(void) void __init tcp_v4_init(void) { - int cpu, res; - - for_each_possible_cpu(cpu) { - struct sock *sk; - - res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW, - IPPROTO_TCP, &init_net); - if (res) - panic("Failed to create the TCP control socket.\n"); - sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); - - /* Please enforce IP_DF and IPID==0 for RST and - * ACK sent in SYN-RECV and TIME-WAIT state. - */ - inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO; - - per_cpu(ipv4_tcp_sk, cpu) = sk; - } if (register_pernet_subsys(&tcp_sk_ops)) panic("Failed to create the TCP control socket.\n");