mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-09 04:10:18 +09:00
tcp: switch pacing timer to softirq based hrtimer
linux-4.16 got support for softirq based hrtimers.
TCP can switch its pacing hrtimer to this variant, since this
avoids going through a tasklet and some atomic operations.
pacing timer logic looks like other (jiffies based) tcp timers.
v2: use hrtimer_try_to_cancel() in tcp_clear_xmit_timers()
to correctly release reference on socket if needed.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
committed by
David S. Miller
parent
4cbd7a7d3c
commit
73a6bab5aa
@@ -557,7 +557,9 @@ void tcp_fin(struct sock *sk);
|
|||||||
void tcp_init_xmit_timers(struct sock *);
|
void tcp_init_xmit_timers(struct sock *);
|
||||||
static inline void tcp_clear_xmit_timers(struct sock *sk)
|
static inline void tcp_clear_xmit_timers(struct sock *sk)
|
||||||
{
|
{
|
||||||
hrtimer_cancel(&tcp_sk(sk)->pacing_timer);
|
if (hrtimer_try_to_cancel(&tcp_sk(sk)->pacing_timer) == 1)
|
||||||
|
sock_put(sk);
|
||||||
|
|
||||||
inet_csk_clear_xmit_timers(sk);
|
inet_csk_clear_xmit_timers(sk);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -772,7 +772,7 @@ struct tsq_tasklet {
|
|||||||
};
|
};
|
||||||
static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet);
|
static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet);
|
||||||
|
|
||||||
static void tcp_tsq_handler(struct sock *sk)
|
static void tcp_tsq_write(struct sock *sk)
|
||||||
{
|
{
|
||||||
if ((1 << sk->sk_state) &
|
if ((1 << sk->sk_state) &
|
||||||
(TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
|
(TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
|
||||||
@@ -789,6 +789,16 @@ static void tcp_tsq_handler(struct sock *sk)
|
|||||||
0, GFP_ATOMIC);
|
0, GFP_ATOMIC);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void tcp_tsq_handler(struct sock *sk)
|
||||||
|
{
|
||||||
|
bh_lock_sock(sk);
|
||||||
|
if (!sock_owned_by_user(sk))
|
||||||
|
tcp_tsq_write(sk);
|
||||||
|
else if (!test_and_set_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
|
||||||
|
sock_hold(sk);
|
||||||
|
bh_unlock_sock(sk);
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
* One tasklet per cpu tries to send more skbs.
|
* One tasklet per cpu tries to send more skbs.
|
||||||
* We run in tasklet context but need to disable irqs when
|
* We run in tasklet context but need to disable irqs when
|
||||||
@@ -816,16 +826,7 @@ static void tcp_tasklet_func(unsigned long data)
|
|||||||
smp_mb__before_atomic();
|
smp_mb__before_atomic();
|
||||||
clear_bit(TSQ_QUEUED, &sk->sk_tsq_flags);
|
clear_bit(TSQ_QUEUED, &sk->sk_tsq_flags);
|
||||||
|
|
||||||
if (!sk->sk_lock.owned &&
|
tcp_tsq_handler(sk);
|
||||||
test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) {
|
|
||||||
bh_lock_sock(sk);
|
|
||||||
if (!sock_owned_by_user(sk)) {
|
|
||||||
clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags);
|
|
||||||
tcp_tsq_handler(sk);
|
|
||||||
}
|
|
||||||
bh_unlock_sock(sk);
|
|
||||||
}
|
|
||||||
|
|
||||||
sk_free(sk);
|
sk_free(sk);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -853,9 +854,10 @@ void tcp_release_cb(struct sock *sk)
|
|||||||
nflags = flags & ~TCP_DEFERRED_ALL;
|
nflags = flags & ~TCP_DEFERRED_ALL;
|
||||||
} while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags);
|
} while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags);
|
||||||
|
|
||||||
if (flags & TCPF_TSQ_DEFERRED)
|
if (flags & TCPF_TSQ_DEFERRED) {
|
||||||
tcp_tsq_handler(sk);
|
tcp_tsq_write(sk);
|
||||||
|
__sock_put(sk);
|
||||||
|
}
|
||||||
/* Here begins the tricky part :
|
/* Here begins the tricky part :
|
||||||
* We are called from release_sock() with :
|
* We are called from release_sock() with :
|
||||||
* 1) BH disabled
|
* 1) BH disabled
|
||||||
@@ -929,7 +931,7 @@ void tcp_wfree(struct sk_buff *skb)
|
|||||||
if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED))
|
if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED;
|
nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED;
|
||||||
nval = cmpxchg(&sk->sk_tsq_flags, oval, nval);
|
nval = cmpxchg(&sk->sk_tsq_flags, oval, nval);
|
||||||
if (nval != oval)
|
if (nval != oval)
|
||||||
continue;
|
continue;
|
||||||
@@ -948,37 +950,17 @@ out:
|
|||||||
sk_free(sk);
|
sk_free(sk);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Note: Called under hard irq.
|
/* Note: Called under soft irq.
|
||||||
* We can not call TCP stack right away.
|
* We can call TCP stack right away, unless socket is owned by user.
|
||||||
*/
|
*/
|
||||||
enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
|
enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
|
||||||
{
|
{
|
||||||
struct tcp_sock *tp = container_of(timer, struct tcp_sock, pacing_timer);
|
struct tcp_sock *tp = container_of(timer, struct tcp_sock, pacing_timer);
|
||||||
struct sock *sk = (struct sock *)tp;
|
struct sock *sk = (struct sock *)tp;
|
||||||
unsigned long nval, oval;
|
|
||||||
|
|
||||||
for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) {
|
tcp_tsq_handler(sk);
|
||||||
struct tsq_tasklet *tsq;
|
sock_put(sk);
|
||||||
bool empty;
|
|
||||||
|
|
||||||
if (oval & TSQF_QUEUED)
|
|
||||||
break;
|
|
||||||
|
|
||||||
nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED;
|
|
||||||
nval = cmpxchg(&sk->sk_tsq_flags, oval, nval);
|
|
||||||
if (nval != oval)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (!refcount_inc_not_zero(&sk->sk_wmem_alloc))
|
|
||||||
break;
|
|
||||||
/* queue this socket to tasklet queue */
|
|
||||||
tsq = this_cpu_ptr(&tsq_tasklet);
|
|
||||||
empty = list_empty(&tsq->head);
|
|
||||||
list_add(&tp->tsq_node, &tsq->head);
|
|
||||||
if (empty)
|
|
||||||
tasklet_schedule(&tsq->tasklet);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return HRTIMER_NORESTART;
|
return HRTIMER_NORESTART;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1011,7 +993,8 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
|
|||||||
do_div(len_ns, rate);
|
do_div(len_ns, rate);
|
||||||
hrtimer_start(&tcp_sk(sk)->pacing_timer,
|
hrtimer_start(&tcp_sk(sk)->pacing_timer,
|
||||||
ktime_add_ns(ktime_get(), len_ns),
|
ktime_add_ns(ktime_get(), len_ns),
|
||||||
HRTIMER_MODE_ABS_PINNED);
|
HRTIMER_MODE_ABS_PINNED_SOFT);
|
||||||
|
sock_hold(sk);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
|
static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
|
||||||
@@ -1078,7 +1061,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
|
|||||||
|
|
||||||
/* if no packet is in qdisc/device queue, then allow XPS to select
|
/* if no packet is in qdisc/device queue, then allow XPS to select
|
||||||
* another queue. We can be called from tcp_tsq_handler()
|
* another queue. We can be called from tcp_tsq_handler()
|
||||||
* which holds one reference to sk_wmem_alloc.
|
* which holds one reference to sk.
|
||||||
*
|
*
|
||||||
* TODO: Ideally, in-flight pure ACK packets should not matter here.
|
* TODO: Ideally, in-flight pure ACK packets should not matter here.
|
||||||
* One way to get this would be to set skb->truesize = 2 on them.
|
* One way to get this would be to set skb->truesize = 2 on them.
|
||||||
@@ -2185,7 +2168,7 @@ static int tcp_mtu_probe(struct sock *sk)
|
|||||||
static bool tcp_pacing_check(const struct sock *sk)
|
static bool tcp_pacing_check(const struct sock *sk)
|
||||||
{
|
{
|
||||||
return tcp_needs_internal_pacing(sk) &&
|
return tcp_needs_internal_pacing(sk) &&
|
||||||
hrtimer_active(&tcp_sk(sk)->pacing_timer);
|
hrtimer_is_queued(&tcp_sk(sk)->pacing_timer);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* TCP Small Queues :
|
/* TCP Small Queues :
|
||||||
@@ -2365,8 +2348,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
|
|||||||
skb, limit, mss_now, gfp)))
|
skb, limit, mss_now, gfp)))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
|
|
||||||
clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags);
|
|
||||||
if (tcp_small_queue_check(sk, skb, 0))
|
if (tcp_small_queue_check(sk, skb, 0))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|||||||
@@ -713,6 +713,6 @@ void tcp_init_xmit_timers(struct sock *sk)
|
|||||||
inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
|
inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
|
||||||
&tcp_keepalive_timer);
|
&tcp_keepalive_timer);
|
||||||
hrtimer_init(&tcp_sk(sk)->pacing_timer, CLOCK_MONOTONIC,
|
hrtimer_init(&tcp_sk(sk)->pacing_timer, CLOCK_MONOTONIC,
|
||||||
HRTIMER_MODE_ABS_PINNED);
|
HRTIMER_MODE_ABS_PINNED_SOFT);
|
||||||
tcp_sk(sk)->pacing_timer.function = tcp_pace_kick;
|
tcp_sk(sk)->pacing_timer.function = tcp_pace_kick;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user