mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-05 10:31:46 +09:00
tcp: avoid premature drops in tcp_add_backlog()
[ Upstream commit ec00ed472bdb7d0af840da68c8c11bff9f4d9caa ]
While testing TCP performance with latest trees,
I saw suspect SOCKET_BACKLOG drops.
tcp_add_backlog() computes its limit with :
limit = (u32)READ_ONCE(sk->sk_rcvbuf) +
(u32)(READ_ONCE(sk->sk_sndbuf) >> 1);
limit += 64 * 1024;
This does not take into account that sk->sk_backlog.len
is reset only at the very end of __release_sock().
Both sk->sk_backlog.len and sk->sk_rmem_alloc could reach
sk_rcvbuf in normal conditions.
We should double sk->sk_rcvbuf contribution in the formula
to absorb bubbles in the backlog, which happen more often
for very fast flows.
This change maintains decent protection against abuses.
Fixes: c377411f24 ("net: sk_add_backlog() take rmem_alloc into account")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20240423125620.3309458-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
committed by
Greg Kroah-Hartman
parent
3616b4e1f1
commit
131490afa2
@@ -1771,7 +1771,7 @@ int tcp_v4_early_demux(struct sk_buff *skb)
|
|||||||
bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
|
bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
|
||||||
enum skb_drop_reason *reason)
|
enum skb_drop_reason *reason)
|
||||||
{
|
{
|
||||||
u32 limit, tail_gso_size, tail_gso_segs;
|
u32 tail_gso_size, tail_gso_segs;
|
||||||
struct skb_shared_info *shinfo;
|
struct skb_shared_info *shinfo;
|
||||||
const struct tcphdr *th;
|
const struct tcphdr *th;
|
||||||
struct tcphdr *thtail;
|
struct tcphdr *thtail;
|
||||||
@@ -1780,6 +1780,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
|
|||||||
bool fragstolen;
|
bool fragstolen;
|
||||||
u32 gso_segs;
|
u32 gso_segs;
|
||||||
u32 gso_size;
|
u32 gso_size;
|
||||||
|
u64 limit;
|
||||||
int delta;
|
int delta;
|
||||||
|
|
||||||
/* In case all data was pulled from skb frags (in __pskb_pull_tail()),
|
/* In case all data was pulled from skb frags (in __pskb_pull_tail()),
|
||||||
@@ -1877,7 +1878,13 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
|
|||||||
__skb_push(skb, hdrlen);
|
__skb_push(skb, hdrlen);
|
||||||
|
|
||||||
no_coalesce:
|
no_coalesce:
|
||||||
limit = (u32)READ_ONCE(sk->sk_rcvbuf) + (u32)(READ_ONCE(sk->sk_sndbuf) >> 1);
|
/* sk->sk_backlog.len is reset only at the end of __release_sock().
|
||||||
|
* Both sk->sk_backlog.len and sk->sk_rmem_alloc could reach
|
||||||
|
* sk_rcvbuf in normal conditions.
|
||||||
|
*/
|
||||||
|
limit = ((u64)READ_ONCE(sk->sk_rcvbuf)) << 1;
|
||||||
|
|
||||||
|
limit += ((u32)READ_ONCE(sk->sk_sndbuf)) >> 1;
|
||||||
|
|
||||||
/* Only socket owner can try to collapse/prune rx queues
|
/* Only socket owner can try to collapse/prune rx queues
|
||||||
* to reduce memory overhead, so add a little headroom here.
|
* to reduce memory overhead, so add a little headroom here.
|
||||||
@@ -1885,6 +1892,8 @@ no_coalesce:
|
|||||||
*/
|
*/
|
||||||
limit += 64 * 1024;
|
limit += 64 * 1024;
|
||||||
|
|
||||||
|
limit = min_t(u64, limit, UINT_MAX);
|
||||||
|
|
||||||
if (unlikely(sk_add_backlog(sk, skb, limit))) {
|
if (unlikely(sk_add_backlog(sk, skb, limit))) {
|
||||||
bh_unlock_sock(sk);
|
bh_unlock_sock(sk);
|
||||||
*reason = SKB_DROP_REASON_SOCKET_BACKLOG;
|
*reason = SKB_DROP_REASON_SOCKET_BACKLOG;
|
||||||
|
|||||||
Reference in New Issue
Block a user