mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-05 18:41:58 +09:00
rcu/tree: Defer setting of jiffies during stall reset
commit b96e7a5fa0ba9cda32888e04f8f4bac42d49a7f8 upstream.
There are instances where rcu_cpu_stall_reset() is called when jiffies
did not get a chance to update for a long time. Before jiffies is
updated, the CPU stall detector can go off triggering false-positives
where a just-started grace period appears to be ages old. In the past,
we disabled stall detection in rcu_cpu_stall_reset() however this got
changed [1]. This is resulting in false-positives in KGDB usecase [2].
Fix this by deferring the update of jiffies to the third run of the FQS
loop. This is more robust, as, even if rcu_cpu_stall_reset() is called
just before jiffies is read, we would end up pushing out the jiffies
read by 3 more FQS loops. Meanwhile the CPU stall detection will be
delayed and we will not get any false positives.
[1] https://lore.kernel.org/all/20210521155624.174524-2-senozhatsky@chromium.org/
[2] https://lore.kernel.org/all/20230814020045.51950-2-chenhuacai@loongson.cn/
Tested with rcutorture.cpu_stall option as well to verify stall behavior
with/without patch.
Tested-by: Huacai Chen <chenhuacai@loongson.cn>
Reported-by: Binbin Zhou <zhoubinbin@loongson.cn>
Closes: https://lore.kernel.org/all/20230814020045.51950-2-chenhuacai@loongson.cn/
Suggested-by: Paul McKenney <paulmck@kernel.org>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Fixes: a80be428fb ("rcu: Do not disable GP stall detection in rcu_cpu_stall_reset()")
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
committed by
Greg Kroah-Hartman
parent
6aa3cab6be
commit
8d837850d3
@@ -1604,10 +1604,22 @@ static bool rcu_gp_fqs_check_wake(int *gfp)
|
|||||||
*/
|
*/
|
||||||
static void rcu_gp_fqs(bool first_time)
|
static void rcu_gp_fqs(bool first_time)
|
||||||
{
|
{
|
||||||
|
int nr_fqs = READ_ONCE(rcu_state.nr_fqs_jiffies_stall);
|
||||||
struct rcu_node *rnp = rcu_get_root();
|
struct rcu_node *rnp = rcu_get_root();
|
||||||
|
|
||||||
WRITE_ONCE(rcu_state.gp_activity, jiffies);
|
WRITE_ONCE(rcu_state.gp_activity, jiffies);
|
||||||
WRITE_ONCE(rcu_state.n_force_qs, rcu_state.n_force_qs + 1);
|
WRITE_ONCE(rcu_state.n_force_qs, rcu_state.n_force_qs + 1);
|
||||||
|
|
||||||
|
WARN_ON_ONCE(nr_fqs > 3);
|
||||||
|
/* Only countdown nr_fqs for stall purposes if jiffies moves. */
|
||||||
|
if (nr_fqs) {
|
||||||
|
if (nr_fqs == 1) {
|
||||||
|
WRITE_ONCE(rcu_state.jiffies_stall,
|
||||||
|
jiffies + rcu_jiffies_till_stall_check());
|
||||||
|
}
|
||||||
|
WRITE_ONCE(rcu_state.nr_fqs_jiffies_stall, --nr_fqs);
|
||||||
|
}
|
||||||
|
|
||||||
if (first_time) {
|
if (first_time) {
|
||||||
/* Collect dyntick-idle snapshots. */
|
/* Collect dyntick-idle snapshots. */
|
||||||
force_qs_rnp(dyntick_save_progress_counter);
|
force_qs_rnp(dyntick_save_progress_counter);
|
||||||
|
|||||||
@@ -365,6 +365,10 @@ struct rcu_state {
|
|||||||
/* in jiffies. */
|
/* in jiffies. */
|
||||||
unsigned long jiffies_stall; /* Time at which to check */
|
unsigned long jiffies_stall; /* Time at which to check */
|
||||||
/* for CPU stalls. */
|
/* for CPU stalls. */
|
||||||
|
int nr_fqs_jiffies_stall; /* Number of fqs loops after
|
||||||
|
* which read jiffies and set
|
||||||
|
* jiffies_stall. Stall
|
||||||
|
* warnings disabled if !0. */
|
||||||
unsigned long jiffies_resched; /* Time at which to resched */
|
unsigned long jiffies_resched; /* Time at which to resched */
|
||||||
/* a reluctant CPU. */
|
/* a reluctant CPU. */
|
||||||
unsigned long n_force_qs_gpstart; /* Snapshot of n_force_qs at */
|
unsigned long n_force_qs_gpstart; /* Snapshot of n_force_qs at */
|
||||||
|
|||||||
@@ -149,12 +149,17 @@ static void panic_on_rcu_stall(void)
|
|||||||
/**
|
/**
|
||||||
* rcu_cpu_stall_reset - restart stall-warning timeout for current grace period
|
* rcu_cpu_stall_reset - restart stall-warning timeout for current grace period
|
||||||
*
|
*
|
||||||
|
* To perform the reset request from the caller, disable stall detection until
|
||||||
|
* 3 fqs loops have passed. This is required to ensure a fresh jiffies is
|
||||||
|
* loaded. It should be safe to do from the fqs loop as enough timer
|
||||||
|
* interrupts and context switches should have passed.
|
||||||
|
*
|
||||||
* The caller must disable hard irqs.
|
* The caller must disable hard irqs.
|
||||||
*/
|
*/
|
||||||
void rcu_cpu_stall_reset(void)
|
void rcu_cpu_stall_reset(void)
|
||||||
{
|
{
|
||||||
WRITE_ONCE(rcu_state.jiffies_stall,
|
WRITE_ONCE(rcu_state.nr_fqs_jiffies_stall, 3);
|
||||||
jiffies + rcu_jiffies_till_stall_check());
|
WRITE_ONCE(rcu_state.jiffies_stall, ULONG_MAX);
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
@@ -170,6 +175,7 @@ static void record_gp_stall_check_time(void)
|
|||||||
WRITE_ONCE(rcu_state.gp_start, j);
|
WRITE_ONCE(rcu_state.gp_start, j);
|
||||||
j1 = rcu_jiffies_till_stall_check();
|
j1 = rcu_jiffies_till_stall_check();
|
||||||
smp_mb(); // ->gp_start before ->jiffies_stall and caller's ->gp_seq.
|
smp_mb(); // ->gp_start before ->jiffies_stall and caller's ->gp_seq.
|
||||||
|
WRITE_ONCE(rcu_state.nr_fqs_jiffies_stall, 0);
|
||||||
WRITE_ONCE(rcu_state.jiffies_stall, j + j1);
|
WRITE_ONCE(rcu_state.jiffies_stall, j + j1);
|
||||||
rcu_state.jiffies_resched = j + j1 / 2;
|
rcu_state.jiffies_resched = j + j1 / 2;
|
||||||
rcu_state.n_force_qs_gpstart = READ_ONCE(rcu_state.n_force_qs);
|
rcu_state.n_force_qs_gpstart = READ_ONCE(rcu_state.n_force_qs);
|
||||||
@@ -694,6 +700,16 @@ static void check_cpu_stall(struct rcu_data *rdp)
|
|||||||
!rcu_gp_in_progress())
|
!rcu_gp_in_progress())
|
||||||
return;
|
return;
|
||||||
rcu_stall_kick_kthreads();
|
rcu_stall_kick_kthreads();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if it was requested (via rcu_cpu_stall_reset()) that the FQS
|
||||||
|
* loop has to set jiffies to ensure a non-stale jiffies value. This
|
||||||
|
* is required to have good jiffies value after coming out of long
|
||||||
|
* breaks of jiffies updates. Not doing so can cause false positives.
|
||||||
|
*/
|
||||||
|
if (READ_ONCE(rcu_state.nr_fqs_jiffies_stall) > 0)
|
||||||
|
return;
|
||||||
|
|
||||||
j = jiffies;
|
j = jiffies;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|||||||
Reference in New Issue
Block a user